From c51e72686e7c66bbbbdd42dae5e746a7dccdcdd5 Mon Sep 17 00:00:00 2001
From: Peter Leurs <peter@pfoe.be>
Date: Fri, 15 Aug 2025 23:09:26 +0200
Subject: [PATCH] Rewrite logging logic

Now log everything in a file, but split it up into multiple files
The -debug variant can contain sensitive data, but the regular file
cannot.  This way we can use the non-debug variant for the monitoring
later on.

Note that the idea is that those files are empty if the last run was
successfull, and contain the error when something went wrong.
---
 backupmanager.j2 | 51 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/backupmanager.j2 b/backupmanager.j2
index b5f6fdf..063e48b 100644
--- a/backupmanager.j2
+++ b/backupmanager.j2
@@ -43,6 +43,23 @@ except BlockingIOError:
     print("Another instance is running.")
     sys.exit(1)
 
+# This file contains all problems with the last run.  Empty when we ran succesfully,
+# otherwise what needs attention...
+logfd = open("{{ homedir }}/.backupmanager.errors", "w")
+logfd2 = open("{{ homedir }}/.backupmanager.errors-debug", "w")
+
+def output_warning(msg, sensitive=""):
+    print(f"WARN: {msg} {sensitive}")
+    logfd.write(f"WARN: {msg}\n")
+    logfd2.write(f"WARN: {msg} {sensitive}\n")
+
+def output_fatal(msg, sensitive="", dontquityet=False):
+    print(f"FATAL: {msg} {sensitive}")
+    logfd.write(f"FATAL: {msg}\n")
+    logfd2.write(f"FATAL: {msg} {sensitive}\n")
+    if not dontquityet:
+        sys.exit(1)
+
 def run_command(command, env=None):
     current_env = os.environ.copy()
     if env is not None:
@@ -53,7 +70,7 @@ def run_command(command, env=None):
     sys.stdout.flush()
     sys.stderr.flush()
     if process.returncode != 0:
-        raise RuntimeError(f"Command '{command}' failed with return code {process.returncode}, environment is {env}")
+        output_fatal(f"Command '{command}' failed with return code {process.returncode}", sensitive=f"environment is {env}")
 
 def get_command(command, env=None, full_return=False):
     current_env = os.environ.copy()
@@ -71,7 +88,7 @@ def get_command(command, env=None, full_return=False):
     if error:
         print(error.decode().strip(), file=sys.stderr)
     if return_code != 0:
-        raise RuntimeError(f"Command '{command}' failed with return code {return_code}, environment is {env}")
+        output_fatal(f"Command '{command}' failed with return code {return_code}", sensitive=f"environment is {env}")
     return output.decode()
 
 def get_snapshot_info(repo):
@@ -128,23 +145,22 @@ for r in repos:
         repos_ok.append({"repo":r,"config":repoconfig})
 
 if len(repos_ok)==0:
-    print("None of the repositories can be accessed.  At least one must be reachable for me to output repository init commands")
-    sys.exit(1)
+    output_fatal("None of the repositories can be accessed.  At least one must be reachable for me to output repository init commands")
 
 if polynomial_ok==False:
-    print("Not all repositories have the same chunker polynomial configured.  This can ONLY be configured when the repository is first created.  Please delete the repositories you can rebuild...")
+    msg = "Not all repositories have the same chunker polynomial configured.  This can ONLY be configured when the repository is first created.  Please delete the repositories you can rebuild...\n"
+    s = ""
     for r in repos_ok:
-        print(f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}")
-    sys.exit(1)
+        s = s + f"Repo: {r['repo']['url']} polynomial: {r['config']['chunker_polynomial']}\n"
+    output_fatal(msg, sensitive=s)
 
 if len(repos_in_error)!=0:
-    print("Could not open all repositories.  Check that they are accessible and that the passwords are correct.  If they are not yet initialized, use the following commands:")
+    msg = "Could not open all repositories.  Check that they are accessible and that the passwords are correct."
+    s = "If they are not yet initialized, use the following commands:\n\n"
     repo_from=repos_ok[0]['repo']
     for r in repos_in_error:
-        print()
-        print(f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}")
-    sys.exit(1)
-
+        s = s + f"RESTIC_PASSWORD={r['key']} RESTIC_FROM_PASSWORD={repo_from['key']} restic init --copy-chunker-params=true --from-repo {repo_from['url']} -r {r['url']}\n"
+    output_fatal(msg, sensitive=s)
 
 for r in repos:
     print(f"Getting snapshot list for repo {r['name']}")
@@ -160,11 +176,10 @@ for r in repos:
         if s['hostname'] not in r['hosts']:
             wronghosts.update(s['hostname'])
     if wronghosts:
-        print(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list")
+        output_fatal(f"The repository {r['name']} contains backups for unknown hosts {wronghosts}, either delete the backups, or add the hosts to the list", dontquityet=True)
         hostsok = False
 if not hostsok:
-    print("Host information not ok, aborting")
-    sys.exit(1)
+    output_fatal("Host information not ok, aborting")
 
 for host in allhosts:
     print(f"Syncing hostname {host}")
@@ -194,7 +209,7 @@ for host in allhosts:
                 most_recent_backup_on = r
                 most_recent_backup_id = s['id']
     if most_recent_backup_ts == 0:
-        print(f"WARNING: There are no backups for {host}")
+        output_warning(f"There are no backups for {host}")
         continue
 
     # We now know the most recent backup.  See if a backup is present on all targets that carry this hostname.
@@ -228,9 +243,9 @@ for host in allhosts:
         timeout = hosts[host]["timeout"]
     if not idle:
         if not have_a_copy:
-            print(f"WARNING: We do not have a copy for {host}")
+            output_warning(f"We do not have a copy for {host}")
         if most_recent_backup_ts < time.time()-(timeout*24*3600):
-            print(f"WARNING: Last backup for {host} is too old")
+            output_warning(f"Last backup for {host} is too old")
 
 for r in repos:
     e = {}