diff --git a/appinfo.yml b/appinfo.yml index 6eb491a..d16b1be 100644 --- a/appinfo.yml +++ b/appinfo.yml @@ -6,6 +6,16 @@ templatefiles: - src: backupmanager.j2 dest: ~/backupmanager mode: "0755" + - src: monitor@.service.j2 + dest: ~/.config/systemd/user/monitor@.service + - src: monitor.socket.j2 + dest: ~/.config/systemd/user/monitor.socket + - src: monitor.sh.j2 + dest: ~/monitor.sh + mode: "0755" + - src: monitor-test.sh.j2 + dest: ~/monitor-test.sh + mode: "0755" backuptype: none configdefinition: "$id": "backup manager config" @@ -72,3 +82,22 @@ configdefinition: description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups required: - repos +exports: + monitoring: + checks: + - name: main + message: Backup manager activated + interval: 3600 + type: string + okvalue: active + - name: lastrun + message: Errors last run + interval: 3600 + type: string + okvalue: "" + - name: lastrunrecent + message: Backup manager did not run recently + interval: 3600 + type: string + okvalue: "OK" + diff --git a/backupmanager.j2 b/backupmanager.j2 index b5f6fdf..063e48b 100644 --- a/backupmanager.j2 +++ b/backupmanager.j2 @@ -43,6 +43,23 @@ except BlockingIOError: print("Another instance is running.") sys.exit(1) +# This file contains all problems with the last run. Empty when we ran succesfully, +# otherwise what needs attention... +logfd = open("{{ homedir }}/.backupmanager.errors", "w") +logfd2 = open("{{ homedir }}/.backupmanager.errors-debug", "w") + +def output_warning(msg, sensitive=""): + print(f"WARN: {msg} {sensitive}") + logfd.write(f"WARN: {msg}\n") + logfd2.write(f"WARN: {msg} {sensitive}\n") + +def output_fatal(msg, sensitive="", dontquityet=False): + print(f"FATAL: {msg} {sensitive}") + logfd.write(f"FATAL: {msg}\n") + logfd2.write(f"FATAL: {msg} {sensitive}\n") + if not dontquityet: + sys.exit(1) + def run_command(command, env=None): current_env = os.environ.copy() if env is not None: @@ -53,7 +70,7 @@ def run_command(command, env=None): sys.stdout.flush() sys.stderr.flush() if process.returncode != 0: - raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}") + output_fatal(f"Command '{command}' failed with return code {process.returncode}", sensitive=f"environment is {env}") def get_command(command, env=None, full_return=False): current_env = os.environ.copy() @@ -71,7 +88,7 @@ def get_command(command, env=None, full_return=False): if error: print(error.decode().strip(), file=sys.stderr) if return_code != 0: - raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}") + output_fatal(f"Command '{command}' failed with return code {return_code}", sensitive=f"environment is {env}") return output.decode() def get_snapshot_info(repo): @@ -128,23 +145,22 @@ for r in repos: repos_ok.append({"repo":r,"config":repoconfig}) if len(repos_ok)==0: - print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands") - sys.exit(1) + output_fatal("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands") if polynomial_ok==False: - print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...") + msg = "Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...\n" + s = "" for r in repos_ok: - print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}") - sys.exit(1) + s = s + f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}\n" + output_fatal(msg, sensitive=s) if len(repos_in_error)!=0: - print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:") + msg = "Could not open all repositories. Check that they are accessible and that the passwords are correct." + s = "If they are not yet initialized, use the following commands:\n\n" repo_from=repos_ok[0]['repo'] for r in repos_in_error: - print() - print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}") - sys.exit(1) - + s = s + f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}\n" + output_fatal(msg, sensitive=s) for r in repos: print(f"Getting snapshot list for repo {r['name']}") @@ -160,11 +176,10 @@ for r in repos: if s['hostname'] not in r['hosts']: wronghosts.update(s['hostname']) if wronghosts: - print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list") + output_fatal(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list", dontquityet=True) hostsok = False if not hostsok: - print("Host information not ok, aborting") - sys.exit(1) + output_fatal("Host information not ok, aborting") for host in allhosts: print(f"Syncing hostname {host}") @@ -194,7 +209,7 @@ for host in allhosts: most_recent_backup_on = r most_recent_backup_id = s['id'] if most_recent_backup_ts == 0: - print(f"WARNING: There are no backups for {host}") + output_warning(f"There are no backups for {host}") continue # We now know the most recent backup. See if a backup is present on all targets that carry this hostname. @@ -228,9 +243,9 @@ for host in allhosts: timeout = hosts[host]["timeout"] if not idle: if not have_a_copy: - print(f"WARNING: We do not have a copy for {host}") + output_warning(f"We do not have a copy for {host}") if most_recent_backup_ts < time.time()-(timeout*24*3600): - print(f"WARNING: Last backup for {host} is too old") + output_warning(f"Last backup for {host} is too old") for r in repos: e = {} diff --git a/monitor-test.sh.j2 b/monitor-test.sh.j2 new file mode 100644 index 0000000..a1a125e --- /dev/null +++ b/monitor-test.sh.j2 @@ -0,0 +1,16 @@ +#!/bin/bash + +# Usage: ./execute_command_client.sh + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +command="$1" + +# Connect to socket and send command +output=$(nc -U {{ statedir }}/{{ name }}.monitoring <<< "$command") + +# Print output +echo "$output" diff --git a/monitor.sh.j2 b/monitor.sh.j2 new file mode 100644 index 0000000..0e8e932 --- /dev/null +++ b/monitor.sh.j2 @@ -0,0 +1,18 @@ +#!/bin/bash +read -r command +case $command in + main) + systemctl is-active --user backup-manager.timer + ;; + lastrun) + cat {{ homedir }}/.backupmanager.errors + ;; + lastrunrecent) + [ -f {{ homedir }}/.backupmanager.errors ] && [ $(find {{ homedir }}/.backupmanager.errors -mtime -2) ] && echo "OK" || echo "outdated" + ;; + *) + echo "Parameter unknown" + ;; +esac + + diff --git a/monitor.socket.j2 b/monitor.socket.j2 new file mode 100644 index 0000000..17163d1 --- /dev/null +++ b/monitor.socket.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Execute Command Socket + +[Socket] +ListenStream={{ statedir }}/{{ name }}.monitoring +Accept=yes + +[Install] +WantedBy=sockets.target diff --git a/monitor@.service.j2 b/monitor@.service.j2 new file mode 100644 index 0000000..55ee896 --- /dev/null +++ b/monitor@.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Execute Command Service +After=network.target +Requires=monitor.socket + +[Service] +Type=simple +ExecStart={{ homedir }}/monitor.sh +StandardInput=socket +StandardOutput=socket +TimeoutStopSec=5 +RuntimeMaxSec=10 + +[Install] +WantedBy=sockets.target \ No newline at end of file