Compare commits
2 commits
7756236e1c
...
8c603d6ebd
| Author | SHA1 | Date | |
|---|---|---|---|
| 8c603d6ebd | |||
| c51e72686e |
6 changed files with 120 additions and 18 deletions
29
appinfo.yml
29
appinfo.yml
|
|
@ -6,6 +6,16 @@ templatefiles:
|
|||
- src: backupmanager.j2
|
||||
dest: ~/backupmanager
|
||||
mode: "0755"
|
||||
- src: monitor@.service.j2
|
||||
dest: ~/.config/systemd/user/monitor@.service
|
||||
- src: monitor.socket.j2
|
||||
dest: ~/.config/systemd/user/monitor.socket
|
||||
- src: monitor.sh.j2
|
||||
dest: ~/monitor.sh
|
||||
mode: "0755"
|
||||
- src: monitor-test.sh.j2
|
||||
dest: ~/monitor-test.sh
|
||||
mode: "0755"
|
||||
backuptype: none
|
||||
configdefinition:
|
||||
"$id": "backup manager config"
|
||||
|
|
@ -72,3 +82,22 @@ configdefinition:
|
|||
description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups
|
||||
required:
|
||||
- repos
|
||||
exports:
|
||||
monitoring:
|
||||
checks:
|
||||
- name: main
|
||||
message: Backup manager activated
|
||||
interval: 3600
|
||||
type: string
|
||||
okvalue: active
|
||||
- name: lastrun
|
||||
message: Errors last run
|
||||
interval: 3600
|
||||
type: string
|
||||
okvalue: ""
|
||||
- name: lastrunrecent
|
||||
message: Backup manager did not run recently
|
||||
interval: 3600
|
||||
type: string
|
||||
okvalue: "OK"
|
||||
|
||||
|
|
|
|||
|
|
@ -43,6 +43,23 @@ except BlockingIOError:
|
|||
print("Another instance is running.")
|
||||
sys.exit(1)
|
||||
|
||||
# This file contains all problems with the last run. Empty when we ran succesfully,
|
||||
# otherwise what needs attention...
|
||||
logfd = open("{{ homedir }}/.backupmanager.errors", "w")
|
||||
logfd2 = open("{{ homedir }}/.backupmanager.errors-debug", "w")
|
||||
|
||||
def output_warning(msg, sensitive=""):
|
||||
print(f"WARN: {msg} {sensitive}")
|
||||
logfd.write(f"WARN: {msg}\n")
|
||||
logfd2.write(f"WARN: {msg} {sensitive}\n")
|
||||
|
||||
def output_fatal(msg, sensitive="", dontquityet=False):
|
||||
print(f"FATAL: {msg} {sensitive}")
|
||||
logfd.write(f"FATAL: {msg}\n")
|
||||
logfd2.write(f"FATAL: {msg} {sensitive}\n")
|
||||
if not dontquityet:
|
||||
sys.exit(1)
|
||||
|
||||
def run_command(command, env=None):
|
||||
current_env = os.environ.copy()
|
||||
if env is not None:
|
||||
|
|
@ -53,7 +70,7 @@ def run_command(command, env=None):
|
|||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
if process.returncode != 0:
|
||||
raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}")
|
||||
output_fatal(f"Command '{command}' failed with return code {process.returncode}", sensitive=f"environment is {env}")
|
||||
|
||||
def get_command(command, env=None, full_return=False):
|
||||
current_env = os.environ.copy()
|
||||
|
|
@ -71,7 +88,7 @@ def get_command(command, env=None, full_return=False):
|
|||
if error:
|
||||
print(error.decode().strip(), file=sys.stderr)
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}")
|
||||
output_fatal(f"Command '{command}' failed with return code {return_code}", sensitive=f"environment is {env}")
|
||||
return output.decode()
|
||||
|
||||
def get_snapshot_info(repo):
|
||||
|
|
@ -128,23 +145,22 @@ for r in repos:
|
|||
repos_ok.append({"repo":r,"config":repoconfig})
|
||||
|
||||
if len(repos_ok)==0:
|
||||
print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
|
||||
sys.exit(1)
|
||||
output_fatal("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
|
||||
|
||||
if polynomial_ok==False:
|
||||
print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...")
|
||||
msg = "Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...\n"
|
||||
s = ""
|
||||
for r in repos_ok:
|
||||
print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}")
|
||||
sys.exit(1)
|
||||
s = s + f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}\n"
|
||||
output_fatal(msg, sensitive=s)
|
||||
|
||||
if len(repos_in_error)!=0:
|
||||
print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:")
|
||||
msg = "Could not open all repositories. Check that they are accessible and that the passwords are correct."
|
||||
s = "If they are not yet initialized, use the following commands:\n\n"
|
||||
repo_from=repos_ok[0]['repo']
|
||||
for r in repos_in_error:
|
||||
print()
|
||||
print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}")
|
||||
sys.exit(1)
|
||||
|
||||
s = s + f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}\n"
|
||||
output_fatal(msg, sensitive=s)
|
||||
|
||||
for r in repos:
|
||||
print(f"Getting snapshot list for repo {r['name']}")
|
||||
|
|
@ -160,11 +176,10 @@ for r in repos:
|
|||
if s['hostname'] not in r['hosts']:
|
||||
wronghosts.update(s['hostname'])
|
||||
if wronghosts:
|
||||
print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list")
|
||||
output_fatal(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list", dontquityet=True)
|
||||
hostsok = False
|
||||
if not hostsok:
|
||||
print("Host information not ok, aborting")
|
||||
sys.exit(1)
|
||||
output_fatal("Host information not ok, aborting")
|
||||
|
||||
for host in allhosts:
|
||||
print(f"Syncing hostname {host}")
|
||||
|
|
@ -194,7 +209,7 @@ for host in allhosts:
|
|||
most_recent_backup_on = r
|
||||
most_recent_backup_id = s['id']
|
||||
if most_recent_backup_ts == 0:
|
||||
print(f"WARNING: There are no backups for {host}")
|
||||
output_warning(f"There are no backups for {host}")
|
||||
continue
|
||||
|
||||
# We now know the most recent backup. See if a backup is present on all targets that carry this hostname.
|
||||
|
|
@ -228,9 +243,9 @@ for host in allhosts:
|
|||
timeout = hosts[host]["timeout"]
|
||||
if not idle:
|
||||
if not have_a_copy:
|
||||
print(f"WARNING: We do not have a copy for {host}")
|
||||
output_warning(f"We do not have a copy for {host}")
|
||||
if most_recent_backup_ts < time.time()-(timeout*24*3600):
|
||||
print(f"WARNING: Last backup for {host} is too old")
|
||||
output_warning(f"Last backup for {host} is too old")
|
||||
|
||||
for r in repos:
|
||||
e = {}
|
||||
|
|
|
|||
16
monitor-test.sh.j2
Normal file
16
monitor-test.sh.j2
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Usage: ./execute_command_client.sh <command>
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 <command>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
command="$1"
|
||||
|
||||
# Connect to socket and send command
|
||||
output=$(nc -U {{ statedir }}/{{ name }}.monitoring <<< "$command")
|
||||
|
||||
# Print output
|
||||
echo "$output"
|
||||
18
monitor.sh.j2
Normal file
18
monitor.sh.j2
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
read -r command
|
||||
case $command in
|
||||
main)
|
||||
systemctl is-active --user backup-manager.timer
|
||||
;;
|
||||
lastrun)
|
||||
cat {{ homedir }}/.backupmanager.errors
|
||||
;;
|
||||
lastrunrecent)
|
||||
[ -f {{ homedir }}/.backupmanager.errors ] && [ $(find {{ homedir }}/.backupmanager.errors -mtime -2) ] && echo "OK" || echo "outdated"
|
||||
;;
|
||||
*)
|
||||
echo "Parameter unknown"
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
9
monitor.socket.j2
Normal file
9
monitor.socket.j2
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
[Unit]
|
||||
Description=Execute Command Socket
|
||||
|
||||
[Socket]
|
||||
ListenStream={{ statedir }}/{{ name }}.monitoring
|
||||
Accept=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=sockets.target
|
||||
15
monitor@.service.j2
Normal file
15
monitor@.service.j2
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[Unit]
|
||||
Description=Execute Command Service
|
||||
After=network.target
|
||||
Requires=monitor.socket
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart={{ homedir }}/monitor.sh
|
||||
StandardInput=socket
|
||||
StandardOutput=socket
|
||||
TimeoutStopSec=5
|
||||
RuntimeMaxSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=sockets.target
|
||||
Loading…
Add table
Add a link
Reference in a new issue