273 lines
9.9 KiB
Text
273 lines
9.9 KiB
Text
|
|
#!/usr/bin/python3
|
||
|
|
|
||
|
|
#### Config
|
||
|
|
repos = []
|
||
|
|
{% for r in config.repos %}
|
||
|
|
_hosts=[]
|
||
|
|
{% for h in r.hosts %}
|
||
|
|
_hosts.append("{{ h }}")
|
||
|
|
{% endfor %}
|
||
|
|
repos.append({
|
||
|
|
"name": "{{ r.name }}",
|
||
|
|
"url": "{{ r.url }}",
|
||
|
|
"key": "{{ r.key }}",
|
||
|
|
"hosts": _hosts,
|
||
|
|
"cleanup_parameters": "{{ r.cleanup_parameters | default(config.default_cleanup_parameters) }}",
|
||
|
|
"check_repo": {% if r.check_repo %}True{% else %}False{% endif %},
|
||
|
|
})
|
||
|
|
{% endfor %}
|
||
|
|
|
||
|
|
{% macro to_python(d) %}{
|
||
|
|
{% for key, value in d.items() %}
|
||
|
|
"{{ key }}": {% if value is boolean %}{% if value %}True{% else %}False{% endif %}{% elif value is mapping %}{{ to_python(value) }}{% else %}{{ value | tojson }}{% endif %},
|
||
|
|
{% endfor %}
|
||
|
|
}
|
||
|
|
{% endmacro %}
|
||
|
|
|
||
|
|
hosts = {{ to_python(config.hosts) }}
|
||
|
|
#### End config
|
||
|
|
|
||
|
|
import subprocess
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import datetime
|
||
|
|
import time
|
||
|
|
|
||
|
|
import fcntl
|
||
|
|
|
||
|
|
lockfd = open("{{ homedir }}/.backupmanager.lock", "w")
|
||
|
|
try:
|
||
|
|
fcntl.flock(lockfd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||
|
|
except BlockingIOError:
|
||
|
|
print("Another instance is running.")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
def run_command(command, env=None):
|
||
|
|
current_env = os.environ.copy()
|
||
|
|
if env is not None:
|
||
|
|
current_env.update(env)
|
||
|
|
process = subprocess.run(command, shell=True, env=current_env)
|
||
|
|
if process.returncode != 0:
|
||
|
|
raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}")
|
||
|
|
|
||
|
|
def get_command(command, env=None, full_return=False):
|
||
|
|
current_env = os.environ.copy()
|
||
|
|
if env is not None:
|
||
|
|
current_env.update(env)
|
||
|
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=current_env)
|
||
|
|
output, error = process.communicate()
|
||
|
|
return_code = process.poll()
|
||
|
|
if full_return:
|
||
|
|
return output.decode().strip(), error.decode().strip(), return_code
|
||
|
|
if error:
|
||
|
|
print(error.decode().strip(), file=sys.stderr)
|
||
|
|
if return_code != 0:
|
||
|
|
raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}")
|
||
|
|
return output.decode()
|
||
|
|
|
||
|
|
def get_snapshot_info(repo):
|
||
|
|
e = {}
|
||
|
|
e["RESTIC_REPOSITORY"]=repo["url"]
|
||
|
|
e["RESTIC_PASSWORD"]=repo["key"]
|
||
|
|
data = get_command(f"restic snapshots --json", env=e)
|
||
|
|
return json.loads(data)
|
||
|
|
|
||
|
|
|
||
|
|
# Define the path to the JSON file
|
||
|
|
STATUS_FILE = "{{ homedir }}/status.json"
|
||
|
|
|
||
|
|
def get_status(name):
|
||
|
|
if not os.path.exists(STATUS_FILE):
|
||
|
|
return False
|
||
|
|
with open(STATUS_FILE, 'r') as file:
|
||
|
|
try:
|
||
|
|
statuses = json.load(file)
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
return False # Return False if the file is empty or corrupted
|
||
|
|
return statuses.get(name, False)
|
||
|
|
|
||
|
|
def set_status(name, value):
|
||
|
|
statuses = {}
|
||
|
|
if os.path.exists(STATUS_FILE):
|
||
|
|
with open(STATUS_FILE, 'r') as file:
|
||
|
|
try:
|
||
|
|
statuses = json.load(file)
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
pass # Ignore errors and start with an empty dictionary
|
||
|
|
statuses[name] = value
|
||
|
|
with open(STATUS_FILE, 'w') as file:
|
||
|
|
json.dump(statuses, file, indent=4)
|
||
|
|
|
||
|
|
|
||
|
|
# Check that we can open the repositories properly
|
||
|
|
repos_in_error=[]
|
||
|
|
repos_ok=[]
|
||
|
|
polynomial=""
|
||
|
|
polynomial_ok=True
|
||
|
|
for r in repos:
|
||
|
|
print(f"Checking repo {r['name']}")
|
||
|
|
output,error,rc = get_command("restic cat config --json",{"RESTIC_REPOSITORY": r['url'],"RESTIC_PASSWORD": r['key'] }, full_return=True)
|
||
|
|
if rc!=0:
|
||
|
|
repos_in_error.append(r)
|
||
|
|
else:
|
||
|
|
repoconfig=json.loads(output)
|
||
|
|
if polynomial=="":
|
||
|
|
polynomial=repoconfig["chunker_polynomial"]
|
||
|
|
else:
|
||
|
|
if polynomial!=repoconfig["chunker_polynomial"]:
|
||
|
|
polynomial_ok=False
|
||
|
|
repos_ok.append({"repo":r,"config":repoconfig})
|
||
|
|
|
||
|
|
if len(repos_ok)==0:
|
||
|
|
print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if polynomial_ok==False:
|
||
|
|
print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...")
|
||
|
|
for r in repos_ok:
|
||
|
|
print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if len(repos_in_error)!=0:
|
||
|
|
print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:")
|
||
|
|
repo_from=repos_ok[0]['repo']
|
||
|
|
for r in repos_in_error:
|
||
|
|
print()
|
||
|
|
print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
|
||
|
|
for r in repos:
|
||
|
|
print(f"Getting snapshot list for repo {r['name']}")
|
||
|
|
r['snapshots'] = get_snapshot_info(r)
|
||
|
|
|
||
|
|
# Verify that we have the correct backups stored
|
||
|
|
allhosts = set()
|
||
|
|
hostsok = True
|
||
|
|
for r in repos:
|
||
|
|
wronghosts = set()
|
||
|
|
allhosts.update(r['hosts'])
|
||
|
|
for s in r['snapshots']:
|
||
|
|
if s['hostname'] not in r['hosts']:
|
||
|
|
wronghosts.update(s['hostname'])
|
||
|
|
if wronghosts:
|
||
|
|
print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list")
|
||
|
|
hostsok = False
|
||
|
|
if not hostsok:
|
||
|
|
print("Host information not ok, aborting")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
for host in allhosts:
|
||
|
|
print(f"Syncing hostname {host}")
|
||
|
|
most_recent_backup_ts = 0
|
||
|
|
most_recent_backup_str = ""
|
||
|
|
most_recent_backup_on = {}
|
||
|
|
most_recent_backup_id = ""
|
||
|
|
for r in repos:
|
||
|
|
for s in r['snapshots']:
|
||
|
|
if s['hostname']!=host:
|
||
|
|
continue
|
||
|
|
time_string = s["time"]
|
||
|
|
# Python does not accept a : in the timezone, yet go's code places it. Fix this...
|
||
|
|
# time_string = time_string[:-6] + time_string[-6:].replace(":","")
|
||
|
|
# Python only accepts 6 digits in the fractals for the seconds....
|
||
|
|
frac_seconds_str = time_string.split('.')[-1].rstrip('Z')
|
||
|
|
frac_seconds_len = len(frac_seconds_str)
|
||
|
|
# Truncate or pad the fractional seconds string as needed
|
||
|
|
if frac_seconds_len >= 4:
|
||
|
|
frac_seconds_str = frac_seconds_str[:4]
|
||
|
|
time_string = time_string[:-frac_seconds_len] + frac_seconds_str + 'Z'
|
||
|
|
unix_time = datetime.datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S.%f%z").timestamp()
|
||
|
|
|
||
|
|
if unix_time>most_recent_backup_ts:
|
||
|
|
most_recent_backup_ts = unix_time
|
||
|
|
most_recent_backup_str = s["time"]
|
||
|
|
most_recent_backup_on = r
|
||
|
|
most_recent_backup_id = s['id']
|
||
|
|
if most_recent_backup_ts == 0:
|
||
|
|
print(f"WARNING: There are no backups for {host}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# We now know the most recent backup. See if a backup is present on all targets that carry this hostname.
|
||
|
|
have_a_copy=False
|
||
|
|
for r in repos:
|
||
|
|
if host not in r['hosts']:
|
||
|
|
continue
|
||
|
|
if r['url']==most_recent_backup_on['url']:
|
||
|
|
continue
|
||
|
|
have_a_copy = True
|
||
|
|
copy_exists = False
|
||
|
|
for s in r['snapshots']:
|
||
|
|
if s['time']==most_recent_backup_str:
|
||
|
|
copy_exists = True
|
||
|
|
if copy_exists:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Copy!
|
||
|
|
print(f"Copying backup {most_recent_backup_id} from {most_recent_backup_on['name']} to {r['name']}")
|
||
|
|
e = {}
|
||
|
|
e["RESTIC_REPOSITORY"]=r["url"]
|
||
|
|
e["RESTIC_PASSWORD"]=r["key"]
|
||
|
|
e["RESTIC_FROM_REPOSITORY"]=most_recent_backup_on["url"]
|
||
|
|
e["RESTIC_FROM_PASSWORD"]=most_recent_backup_on["key"]
|
||
|
|
run_command(f"restic copy {most_recent_backup_id}", env=e)
|
||
|
|
|
||
|
|
idle = False
|
||
|
|
timeout = 10
|
||
|
|
if host in hosts:
|
||
|
|
idle = hosts[host]["idle"]
|
||
|
|
timeout = hosts[host]["timeout"]
|
||
|
|
if not idle:
|
||
|
|
if not have_a_copy:
|
||
|
|
print(f"WARNING: We do not have a copy for {host}")
|
||
|
|
if most_recent_backup_ts < time.time()-(timeout*24*3600):
|
||
|
|
print(f"WARNING: Last backup for {host} is too old")
|
||
|
|
|
||
|
|
for r in repos:
|
||
|
|
e = {}
|
||
|
|
e["RESTIC_REPOSITORY"]=r["url"]
|
||
|
|
e["RESTIC_PASSWORD"]=r["key"]
|
||
|
|
|
||
|
|
name = "repo_check_" + r["name"]
|
||
|
|
name2 = "repo_check_ok_" + r["name"]
|
||
|
|
|
||
|
|
if r["check_repo"]:
|
||
|
|
current_time = int(time.time()) # Get the current Unix timestamp
|
||
|
|
last_execution_time = get_status(name)
|
||
|
|
|
||
|
|
# only check once every 12h. If it is ok, we check 5%, so on average it takes 20 days
|
||
|
|
# to read the entire repo if we are extreamly lucky with the random. But scanning 100%
|
||
|
|
# is very expensive, so we don't do it, unless we are not sure it is all ok.
|
||
|
|
# So always scan 100% unless we know it is ok already from a previous scan.
|
||
|
|
# if a scan fails, we reset the ok flag before scanning, so next time it will be back 100%
|
||
|
|
if last_execution_time is False or (current_time - last_execution_time) > (12 * 60 * 60):
|
||
|
|
repo_ok = get_status(name2)
|
||
|
|
set_status(name2, False)
|
||
|
|
if repo_ok:
|
||
|
|
print(f"Checking random part of the backups for repo {r['name']}....")
|
||
|
|
run_command(f"restic check --read-data-subset=5%", env=e)
|
||
|
|
else:
|
||
|
|
print(f"Checking full backup of the backups for repo {r['name']}....")
|
||
|
|
run_command("restic check", env=e)
|
||
|
|
set_status(name2, True)
|
||
|
|
set_status(name, current_time)
|
||
|
|
|
||
|
|
for r in repos:
|
||
|
|
e = {}
|
||
|
|
e["RESTIC_REPOSITORY"]=r["url"]
|
||
|
|
e["RESTIC_PASSWORD"]=r["key"]
|
||
|
|
name = "repo_purge_" + r["name"]
|
||
|
|
|
||
|
|
current_time = int(time.time()) # Get the current Unix timestamp
|
||
|
|
last_execution_time = get_status(name)
|
||
|
|
|
||
|
|
if last_execution_time is False or (current_time - last_execution_time) > (15 * 24 * 60 * 60):
|
||
|
|
print(f"forgetting old backups for repo {r['name']}....")
|
||
|
|
run_command(f"restic forget {r['cleanup_parameters']}", env=e)
|
||
|
|
|
||
|
|
print(f"Pruning old backups for repo {r['name']}....")
|
||
|
|
run_command("restic prune --max-unused 10%", env=e)
|
||
|
|
|
||
|
|
set_status(name, current_time)
|