Compare commits

..

No commits in common. "8c603d6ebd1e78829f2e9547dea056d6ab62aafe" and "7756236e1c6327ea2b836fd1201de125aed8d28e" have entirely different histories.

6 changed files with 18 additions and 120 deletions

View file

@ -6,16 +6,6 @@ templatefiles:
- src: backupmanager.j2 - src: backupmanager.j2
dest: ~/backupmanager dest: ~/backupmanager
mode: "0755" mode: "0755"
- src: monitor@.service.j2
dest: ~/.config/systemd/user/monitor@.service
- src: monitor.socket.j2
dest: ~/.config/systemd/user/monitor.socket
- src: monitor.sh.j2
dest: ~/monitor.sh
mode: "0755"
- src: monitor-test.sh.j2
dest: ~/monitor-test.sh
mode: "0755"
backuptype: none backuptype: none
configdefinition: configdefinition:
"$id": "backup manager config" "$id": "backup manager config"
@ -82,22 +72,3 @@ configdefinition:
description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups
required: required:
- repos - repos
exports:
monitoring:
checks:
- name: main
message: Backup manager activated
interval: 3600
type: string
okvalue: active
- name: lastrun
message: Errors last run
interval: 3600
type: string
okvalue: ""
- name: lastrunrecent
message: Backup manager did not run recently
interval: 3600
type: string
okvalue: "OK"

View file

@ -43,23 +43,6 @@ except BlockingIOError:
print("Another instance is running.") print("Another instance is running.")
sys.exit(1) sys.exit(1)
# This file contains all problems with the last run. Empty when we ran succesfully,
# otherwise what needs attention...
logfd = open("{{ homedir }}/.backupmanager.errors", "w")
logfd2 = open("{{ homedir }}/.backupmanager.errors-debug", "w")
def output_warning(msg, sensitive=""):
print(f"WARN: {msg} {sensitive}")
logfd.write(f"WARN: {msg}\n")
logfd2.write(f"WARN: {msg} {sensitive}\n")
def output_fatal(msg, sensitive="", dontquityet=False):
print(f"FATAL: {msg} {sensitive}")
logfd.write(f"FATAL: {msg}\n")
logfd2.write(f"FATAL: {msg} {sensitive}\n")
if not dontquityet:
sys.exit(1)
def run_command(command, env=None): def run_command(command, env=None):
current_env = os.environ.copy() current_env = os.environ.copy()
if env is not None: if env is not None:
@ -70,7 +53,7 @@ def run_command(command, env=None):
sys.stdout.flush() sys.stdout.flush()
sys.stderr.flush() sys.stderr.flush()
if process.returncode != 0: if process.returncode != 0:
output_fatal(f"Command '{command}' failed with return code {process.returncode}", sensitive=f"environment is {env}") raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}")
def get_command(command, env=None, full_return=False): def get_command(command, env=None, full_return=False):
current_env = os.environ.copy() current_env = os.environ.copy()
@ -88,7 +71,7 @@ def get_command(command, env=None, full_return=False):
if error: if error:
print(error.decode().strip(), file=sys.stderr) print(error.decode().strip(), file=sys.stderr)
if return_code != 0: if return_code != 0:
output_fatal(f"Command '{command}' failed with return code {return_code}", sensitive=f"environment is {env}") raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}")
return output.decode() return output.decode()
def get_snapshot_info(repo): def get_snapshot_info(repo):
@ -145,22 +128,23 @@ for r in repos:
repos_ok.append({"repo":r,"config":repoconfig}) repos_ok.append({"repo":r,"config":repoconfig})
if len(repos_ok)==0: if len(repos_ok)==0:
output_fatal("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands") print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
sys.exit(1)
if polynomial_ok==False: if polynomial_ok==False:
msg = "Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...\n" print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...")
s = ""
for r in repos_ok: for r in repos_ok:
s = s + f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}\n" print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}")
output_fatal(msg, sensitive=s) sys.exit(1)
if len(repos_in_error)!=0: if len(repos_in_error)!=0:
msg = "Could not open all repositories. Check that they are accessible and that the passwords are correct." print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:")
s = "If they are not yet initialized, use the following commands:\n\n"
repo_from=repos_ok[0]['repo'] repo_from=repos_ok[0]['repo']
for r in repos_in_error: for r in repos_in_error:
s = s + f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}\n" print()
output_fatal(msg, sensitive=s) print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}")
sys.exit(1)
for r in repos: for r in repos:
print(f"Getting snapshot list for repo {r['name']}") print(f"Getting snapshot list for repo {r['name']}")
@ -176,10 +160,11 @@ for r in repos:
if s['hostname'] not in r['hosts']: if s['hostname'] not in r['hosts']:
wronghosts.update(s['hostname']) wronghosts.update(s['hostname'])
if wronghosts: if wronghosts:
output_fatal(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list", dontquityet=True) print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list")
hostsok = False hostsok = False
if not hostsok: if not hostsok:
output_fatal("Host information not ok, aborting") print("Host information not ok, aborting")
sys.exit(1)
for host in allhosts: for host in allhosts:
print(f"Syncing hostname {host}") print(f"Syncing hostname {host}")
@ -209,7 +194,7 @@ for host in allhosts:
most_recent_backup_on = r most_recent_backup_on = r
most_recent_backup_id = s['id'] most_recent_backup_id = s['id']
if most_recent_backup_ts == 0: if most_recent_backup_ts == 0:
output_warning(f"There are no backups for {host}") print(f"WARNING: There are no backups for {host}")
continue continue
# We now know the most recent backup. See if a backup is present on all targets that carry this hostname. # We now know the most recent backup. See if a backup is present on all targets that carry this hostname.
@ -243,9 +228,9 @@ for host in allhosts:
timeout = hosts[host]["timeout"] timeout = hosts[host]["timeout"]
if not idle: if not idle:
if not have_a_copy: if not have_a_copy:
output_warning(f"We do not have a copy for {host}") print(f"WARNING: We do not have a copy for {host}")
if most_recent_backup_ts < time.time()-(timeout*24*3600): if most_recent_backup_ts < time.time()-(timeout*24*3600):
output_warning(f"Last backup for {host} is too old") print(f"WARNING: Last backup for {host} is too old")
for r in repos: for r in repos:
e = {} e = {}

View file

@ -1,16 +0,0 @@
#!/bin/bash
# Usage: ./execute_command_client.sh <command>
if [ $# -ne 1 ]; then
echo "Usage: $0 <command>"
exit 1
fi
command="$1"
# Connect to socket and send command
output=$(nc -U {{ statedir }}/{{ name }}.monitoring <<< "$command")
# Print output
echo "$output"

View file

@ -1,18 +0,0 @@
#!/bin/bash
read -r command
case $command in
main)
systemctl is-active --user backup-manager.timer
;;
lastrun)
cat {{ homedir }}/.backupmanager.errors
;;
lastrunrecent)
[ -f {{ homedir }}/.backupmanager.errors ] && [ $(find {{ homedir }}/.backupmanager.errors -mtime -2) ] && echo "OK" || echo "outdated"
;;
*)
echo "Parameter unknown"
;;
esac

View file

@ -1,9 +0,0 @@
[Unit]
Description=Execute Command Socket
[Socket]
ListenStream={{ statedir }}/{{ name }}.monitoring
Accept=yes
[Install]
WantedBy=sockets.target

View file

@ -1,15 +0,0 @@
[Unit]
Description=Execute Command Service
After=network.target
Requires=monitor.socket
[Service]
Type=simple
ExecStart={{ homedir }}/monitor.sh
StandardInput=socket
StandardOutput=socket
TimeoutStopSec=5
RuntimeMaxSec=10
[Install]
WantedBy=sockets.target