#!/usr/bin/python3 #### Config repos = [] {% for r in config.repos %} _hosts=[] {% for h in r.hosts %} _hosts.append("{{ h }}") {% endfor %} repos.append({ "name": "{{ r.name }}", "url": "{{ r.url }}", "key": "{{ r.key }}", "hosts": _hosts, "cleanup_parameters": "{{ r.cleanup_parameters | default(config.default_cleanup_parameters) }}", "check_repo": {% if r.check_repo %}True{% else %}False{% endif %}, }) {% endfor %} {% macro to_python(d) %}{ {% for key, value in d.items() %} "{{ key }}": {% if value is boolean %}{% if value %}True{% else %}False{% endif %}{% elif value is mapping %}{{ to_python(value) }}{% else %}{{ value | tojson }}{% endif %}, {% endfor %} } {% endmacro %} hosts = {{ to_python(config.hosts) }} #### End config import subprocess import os import sys import json import datetime import time import fcntl lockfd = open("{{ homedir }}/.backupmanager.lock", "w") try: fcntl.flock(lockfd, fcntl.LOCK_EX | fcntl.LOCK_NB) except BlockingIOError: print("Another instance is running.") sys.exit(1) def run_command(command, env=None): current_env = os.environ.copy() if env is not None: current_env.update(env) process = subprocess.run(command, shell=True, env=current_env) if process.returncode != 0: raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}") def get_command(command, env=None, full_return=False): current_env = os.environ.copy() if env is not None: current_env.update(env) process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=current_env) output, error = process.communicate() return_code = process.poll() if full_return: return output.decode().strip(), error.decode().strip(), return_code if error: print(error.decode().strip(), file=sys.stderr) if return_code != 0: raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}") return output.decode() def get_snapshot_info(repo): e = {} e["RESTIC_REPOSITORY"]=repo["url"] e["RESTIC_PASSWORD"]=repo["key"] data = get_command(f"restic snapshots --json", env=e) return json.loads(data) # Define the path to the JSON file STATUS_FILE = "{{ homedir }}/status.json" def get_status(name): if not os.path.exists(STATUS_FILE): return False with open(STATUS_FILE, 'r') as file: try: statuses = json.load(file) except json.JSONDecodeError: return False # Return False if the file is empty or corrupted return statuses.get(name, False) def set_status(name, value): statuses = {} if os.path.exists(STATUS_FILE): with open(STATUS_FILE, 'r') as file: try: statuses = json.load(file) except json.JSONDecodeError: pass # Ignore errors and start with an empty dictionary statuses[name] = value with open(STATUS_FILE, 'w') as file: json.dump(statuses, file, indent=4) # Check that we can open the repositories properly repos_in_error=[] repos_ok=[] polynomial="" polynomial_ok=True for r in repos: print(f"Checking repo {r['name']}") output,error,rc = get_command("restic cat config --json",{"RESTIC_REPOSITORY": r['url'],"RESTIC_PASSWORD": r['key'] }, full_return=True) if rc!=0: repos_in_error.append(r) else: repoconfig=json.loads(output) if polynomial=="": polynomial=repoconfig["chunker_polynomial"] else: if polynomial!=repoconfig["chunker_polynomial"]: polynomial_ok=False repos_ok.append({"repo":r,"config":repoconfig}) if len(repos_ok)==0: print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands") sys.exit(1) if polynomial_ok==False: print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...") for r in repos_ok: print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}") sys.exit(1) if len(repos_in_error)!=0: print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:") repo_from=repos_ok[0]['repo'] for r in repos_in_error: print() print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}") sys.exit(1) for r in repos: print(f"Getting snapshot list for repo {r['name']}") r['snapshots'] = get_snapshot_info(r) # Verify that we have the correct backups stored allhosts = set() hostsok = True for r in repos: wronghosts = set() allhosts.update(r['hosts']) for s in r['snapshots']: if s['hostname'] not in r['hosts']: wronghosts.update(s['hostname']) if wronghosts: print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list") hostsok = False if not hostsok: print("Host information not ok, aborting") sys.exit(1) for host in allhosts: print(f"Syncing hostname {host}") most_recent_backup_ts = 0 most_recent_backup_str = "" most_recent_backup_on = {} most_recent_backup_id = "" for r in repos: for s in r['snapshots']: if s['hostname']!=host: continue time_string = s["time"] # Python does not accept a : in the timezone, yet go's code places it. Fix this... # time_string = time_string[:-6] + time_string[-6:].replace(":","") # Python only accepts 6 digits in the fractals for the seconds.... frac_seconds_str = time_string.split('.')[-1].rstrip('Z') frac_seconds_len = len(frac_seconds_str) # Truncate or pad the fractional seconds string as needed if frac_seconds_len >= 4: frac_seconds_str = frac_seconds_str[:4] time_string = time_string[:-frac_seconds_len] + frac_seconds_str + 'Z' unix_time = datetime.datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S.%f%z").timestamp() if unix_time>most_recent_backup_ts: most_recent_backup_ts = unix_time most_recent_backup_str = s["time"] most_recent_backup_on = r most_recent_backup_id = s['id'] if most_recent_backup_ts == 0: print(f"WARNING: There are no backups for {host}") continue # We now know the most recent backup. See if a backup is present on all targets that carry this hostname. have_a_copy=False for r in repos: if host not in r['hosts']: continue if r['url']==most_recent_backup_on['url']: continue have_a_copy = True copy_exists = False for s in r['snapshots']: if s['time']==most_recent_backup_str: copy_exists = True if copy_exists: continue # Copy! print(f"Copying backup {most_recent_backup_id} from {most_recent_backup_on['name']} to {r['name']}") e = {} e["RESTIC_REPOSITORY"]=r["url"] e["RESTIC_PASSWORD"]=r["key"] e["RESTIC_FROM_REPOSITORY"]=most_recent_backup_on["url"] e["RESTIC_FROM_PASSWORD"]=most_recent_backup_on["key"] run_command(f"restic copy {most_recent_backup_id}", env=e) idle = False timeout = 10 if host in hosts: idle = hosts[host]["idle"] timeout = hosts[host]["timeout"] if not idle: if not have_a_copy: print(f"WARNING: We do not have a copy for {host}") if most_recent_backup_ts < time.time()-(timeout*24*3600): print(f"WARNING: Last backup for {host} is too old") for r in repos: e = {} e["RESTIC_REPOSITORY"]=r["url"] e["RESTIC_PASSWORD"]=r["key"] name = "repo_check_" + r["name"] name2 = "repo_check_ok_" + r["name"] if r["check_repo"]: current_time = int(time.time()) # Get the current Unix timestamp last_execution_time = get_status(name) # only check once every 12h. If it is ok, we check 5%, so on average it takes 20 days # to read the entire repo if we are extreamly lucky with the random. But scanning 100% # is very expensive, so we don't do it, unless we are not sure it is all ok. # So always scan 100% unless we know it is ok already from a previous scan. # if a scan fails, we reset the ok flag before scanning, so next time it will be back 100% if last_execution_time is False or (current_time - last_execution_time) > (12 * 60 * 60): repo_ok = get_status(name2) set_status(name2, False) if repo_ok: print(f"Checking random part of the backups for repo {r['name']}....") run_command(f"restic check --read-data-subset=5%", env=e) else: print(f"Checking full backup of the backups for repo {r['name']}....") run_command("restic check", env=e) set_status(name2, True) set_status(name, current_time) for r in repos: e = {} e["RESTIC_REPOSITORY"]=r["url"] e["RESTIC_PASSWORD"]=r["key"] name = "repo_purge_" + r["name"] current_time = int(time.time()) # Get the current Unix timestamp last_execution_time = get_status(name) if last_execution_time is False or (current_time - last_execution_time) > (15 * 24 * 60 * 60): print(f"forgetting old backups for repo {r['name']}....") run_command(f"restic forget {r['cleanup_parameters']}", env=e) print(f"Pruning old backups for repo {r['name']}....") run_command("restic prune --max-unused 10%", env=e) set_status(name, current_time)