Compare commits
2 commits
7756236e1c
...
8c603d6ebd
| Author | SHA1 | Date | |
|---|---|---|---|
| 8c603d6ebd | |||
| c51e72686e |
6 changed files with 120 additions and 18 deletions
29
appinfo.yml
29
appinfo.yml
|
|
@ -6,6 +6,16 @@ templatefiles:
|
||||||
- src: backupmanager.j2
|
- src: backupmanager.j2
|
||||||
dest: ~/backupmanager
|
dest: ~/backupmanager
|
||||||
mode: "0755"
|
mode: "0755"
|
||||||
|
- src: monitor@.service.j2
|
||||||
|
dest: ~/.config/systemd/user/monitor@.service
|
||||||
|
- src: monitor.socket.j2
|
||||||
|
dest: ~/.config/systemd/user/monitor.socket
|
||||||
|
- src: monitor.sh.j2
|
||||||
|
dest: ~/monitor.sh
|
||||||
|
mode: "0755"
|
||||||
|
- src: monitor-test.sh.j2
|
||||||
|
dest: ~/monitor-test.sh
|
||||||
|
mode: "0755"
|
||||||
backuptype: none
|
backuptype: none
|
||||||
configdefinition:
|
configdefinition:
|
||||||
"$id": "backup manager config"
|
"$id": "backup manager config"
|
||||||
|
|
@ -72,3 +82,22 @@ configdefinition:
|
||||||
description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups
|
description: Repositories where the latest backup is older then timeout days will give a warning that there are no recent backups
|
||||||
required:
|
required:
|
||||||
- repos
|
- repos
|
||||||
|
exports:
|
||||||
|
monitoring:
|
||||||
|
checks:
|
||||||
|
- name: main
|
||||||
|
message: Backup manager activated
|
||||||
|
interval: 3600
|
||||||
|
type: string
|
||||||
|
okvalue: active
|
||||||
|
- name: lastrun
|
||||||
|
message: Errors last run
|
||||||
|
interval: 3600
|
||||||
|
type: string
|
||||||
|
okvalue: ""
|
||||||
|
- name: lastrunrecent
|
||||||
|
message: Backup manager did not run recently
|
||||||
|
interval: 3600
|
||||||
|
type: string
|
||||||
|
okvalue: "OK"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,23 @@ except BlockingIOError:
|
||||||
print("Another instance is running.")
|
print("Another instance is running.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# This file contains all problems with the last run. Empty when we ran succesfully,
|
||||||
|
# otherwise what needs attention...
|
||||||
|
logfd = open("{{ homedir }}/.backupmanager.errors", "w")
|
||||||
|
logfd2 = open("{{ homedir }}/.backupmanager.errors-debug", "w")
|
||||||
|
|
||||||
|
def output_warning(msg, sensitive=""):
|
||||||
|
print(f"WARN: {msg} {sensitive}")
|
||||||
|
logfd.write(f"WARN: {msg}\n")
|
||||||
|
logfd2.write(f"WARN: {msg} {sensitive}\n")
|
||||||
|
|
||||||
|
def output_fatal(msg, sensitive="", dontquityet=False):
|
||||||
|
print(f"FATAL: {msg} {sensitive}")
|
||||||
|
logfd.write(f"FATAL: {msg}\n")
|
||||||
|
logfd2.write(f"FATAL: {msg} {sensitive}\n")
|
||||||
|
if not dontquityet:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def run_command(command, env=None):
|
def run_command(command, env=None):
|
||||||
current_env = os.environ.copy()
|
current_env = os.environ.copy()
|
||||||
if env is not None:
|
if env is not None:
|
||||||
|
|
@ -53,7 +70,7 @@ def run_command(command, env=None):
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}")
|
output_fatal(f"Command '{command}' failed with return code {process.returncode}", sensitive=f"environment is {env}")
|
||||||
|
|
||||||
def get_command(command, env=None, full_return=False):
|
def get_command(command, env=None, full_return=False):
|
||||||
current_env = os.environ.copy()
|
current_env = os.environ.copy()
|
||||||
|
|
@ -71,7 +88,7 @@ def get_command(command, env=None, full_return=False):
|
||||||
if error:
|
if error:
|
||||||
print(error.decode().strip(), file=sys.stderr)
|
print(error.decode().strip(), file=sys.stderr)
|
||||||
if return_code != 0:
|
if return_code != 0:
|
||||||
raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}")
|
output_fatal(f"Command '{command}' failed with return code {return_code}", sensitive=f"environment is {env}")
|
||||||
return output.decode()
|
return output.decode()
|
||||||
|
|
||||||
def get_snapshot_info(repo):
|
def get_snapshot_info(repo):
|
||||||
|
|
@ -128,23 +145,22 @@ for r in repos:
|
||||||
repos_ok.append({"repo":r,"config":repoconfig})
|
repos_ok.append({"repo":r,"config":repoconfig})
|
||||||
|
|
||||||
if len(repos_ok)==0:
|
if len(repos_ok)==0:
|
||||||
print("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
|
output_fatal("None of the repositories can be accessed. At least one must be reachable for me to output repository init commands")
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if polynomial_ok==False:
|
if polynomial_ok==False:
|
||||||
print("Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...")
|
msg = "Not all repositories have the same chunker polynomial configured. This can ONLY be configured when the repository is first created. Please delete the repositories you can rebuild...\n"
|
||||||
|
s = ""
|
||||||
for r in repos_ok:
|
for r in repos_ok:
|
||||||
print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}")
|
s = s + f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}\n"
|
||||||
sys.exit(1)
|
output_fatal(msg, sensitive=s)
|
||||||
|
|
||||||
if len(repos_in_error)!=0:
|
if len(repos_in_error)!=0:
|
||||||
print("Could not open all repositories. Check that they are accessible and that the passwords are correct. If they are not yet initialized, use the following commands:")
|
msg = "Could not open all repositories. Check that they are accessible and that the passwords are correct."
|
||||||
|
s = "If they are not yet initialized, use the following commands:\n\n"
|
||||||
repo_from=repos_ok[0]['repo']
|
repo_from=repos_ok[0]['repo']
|
||||||
for r in repos_in_error:
|
for r in repos_in_error:
|
||||||
print()
|
s = s + f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}\n"
|
||||||
print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}")
|
output_fatal(msg, sensitive=s)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
for r in repos:
|
for r in repos:
|
||||||
print(f"Getting snapshot list for repo {r['name']}")
|
print(f"Getting snapshot list for repo {r['name']}")
|
||||||
|
|
@ -160,11 +176,10 @@ for r in repos:
|
||||||
if s['hostname'] not in r['hosts']:
|
if s['hostname'] not in r['hosts']:
|
||||||
wronghosts.update(s['hostname'])
|
wronghosts.update(s['hostname'])
|
||||||
if wronghosts:
|
if wronghosts:
|
||||||
print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list")
|
output_fatal(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list", dontquityet=True)
|
||||||
hostsok = False
|
hostsok = False
|
||||||
if not hostsok:
|
if not hostsok:
|
||||||
print("Host information not ok, aborting")
|
output_fatal("Host information not ok, aborting")
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
for host in allhosts:
|
for host in allhosts:
|
||||||
print(f"Syncing hostname {host}")
|
print(f"Syncing hostname {host}")
|
||||||
|
|
@ -194,7 +209,7 @@ for host in allhosts:
|
||||||
most_recent_backup_on = r
|
most_recent_backup_on = r
|
||||||
most_recent_backup_id = s['id']
|
most_recent_backup_id = s['id']
|
||||||
if most_recent_backup_ts == 0:
|
if most_recent_backup_ts == 0:
|
||||||
print(f"WARNING: There are no backups for {host}")
|
output_warning(f"There are no backups for {host}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# We now know the most recent backup. See if a backup is present on all targets that carry this hostname.
|
# We now know the most recent backup. See if a backup is present on all targets that carry this hostname.
|
||||||
|
|
@ -228,9 +243,9 @@ for host in allhosts:
|
||||||
timeout = hosts[host]["timeout"]
|
timeout = hosts[host]["timeout"]
|
||||||
if not idle:
|
if not idle:
|
||||||
if not have_a_copy:
|
if not have_a_copy:
|
||||||
print(f"WARNING: We do not have a copy for {host}")
|
output_warning(f"We do not have a copy for {host}")
|
||||||
if most_recent_backup_ts < time.time()-(timeout*24*3600):
|
if most_recent_backup_ts < time.time()-(timeout*24*3600):
|
||||||
print(f"WARNING: Last backup for {host} is too old")
|
output_warning(f"Last backup for {host} is too old")
|
||||||
|
|
||||||
for r in repos:
|
for r in repos:
|
||||||
e = {}
|
e = {}
|
||||||
|
|
|
||||||
16
monitor-test.sh.j2
Normal file
16
monitor-test.sh.j2
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Usage: ./execute_command_client.sh <command>
|
||||||
|
|
||||||
|
if [ $# -ne 1 ]; then
|
||||||
|
echo "Usage: $0 <command>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
command="$1"
|
||||||
|
|
||||||
|
# Connect to socket and send command
|
||||||
|
output=$(nc -U {{ statedir }}/{{ name }}.monitoring <<< "$command")
|
||||||
|
|
||||||
|
# Print output
|
||||||
|
echo "$output"
|
||||||
18
monitor.sh.j2
Normal file
18
monitor.sh.j2
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/bin/bash
|
||||||
|
read -r command
|
||||||
|
case $command in
|
||||||
|
main)
|
||||||
|
systemctl is-active --user backup-manager.timer
|
||||||
|
;;
|
||||||
|
lastrun)
|
||||||
|
cat {{ homedir }}/.backupmanager.errors
|
||||||
|
;;
|
||||||
|
lastrunrecent)
|
||||||
|
[ -f {{ homedir }}/.backupmanager.errors ] && [ $(find {{ homedir }}/.backupmanager.errors -mtime -2) ] && echo "OK" || echo "outdated"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Parameter unknown"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
|
||||||
9
monitor.socket.j2
Normal file
9
monitor.socket.j2
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute Command Socket
|
||||||
|
|
||||||
|
[Socket]
|
||||||
|
ListenStream={{ statedir }}/{{ name }}.monitoring
|
||||||
|
Accept=yes
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=sockets.target
|
||||||
15
monitor@.service.j2
Normal file
15
monitor@.service.j2
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute Command Service
|
||||||
|
After=network.target
|
||||||
|
Requires=monitor.socket
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
ExecStart={{ homedir }}/monitor.sh
|
||||||
|
StandardInput=socket
|
||||||
|
StandardOutput=socket
|
||||||
|
TimeoutStopSec=5
|
||||||
|
RuntimeMaxSec=10
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=sockets.target
|
||||||
Loading…
Add table
Add a link
Reference in a new issue