From e6edd4c256c22ef07299b6ace9efb1f29edb37c1 Mon Sep 17 00:00:00 2001
From: Jens Steidl <Jens.Steidl@slub-dresden.de>
Date: Wed, 3 Apr 2024 10:36:09 +0200
Subject: [PATCH] - init script

---
 backup_am_github_repos_and_issues.sh | 74 ++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 backup_am_github_repos_and_issues.sh

diff --git a/backup_am_github_repos_and_issues.sh b/backup_am_github_repos_and_issues.sh
new file mode 100644
index 0000000..4935801
--- /dev/null
+++ b/backup_am_github_repos_and_issues.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# This script clones/updates/saves all Archivematica related GitHub repositories and issues in the current directory.
+
+# var init
+orgnames="archivematica artefactual artefactual-labs" # GitHub organisations
+page=1 # starting page
+perpage=100 # HINT: 100 already per page max
+maxpage=3 # HINT: should be future proof, currently <200 AM-related repos
+local_repo_dir="archivematica_related_repos" # local subdir for repos
+local_issue_dir="archivematica_related_issues" # local subdir for isses
+issue_base_url="https://github.com/archivematica/Issues/issues/" # base URL for GitHub issues
+min_issue_count=1672 # as of 2024-04-02
+fails=() # set of failed commands
+
+# backup github repos
+mkdir -p ./$local_repo_dir
+pushd $local_repo_dir || exit 1
+until [ $page -gt $maxpage ]; do
+  for org in $orgnames; do
+    while read -r repo_url; do
+      if [ -n "$repo_url" ]; then
+        repo_name=$(echo "$repo_url" | sed 's#^.*/\([^/]*\)\.git$#\1#g') # get repo name
+        echo "############"
+        if [ -d "./$repo_name" ]; then
+          echo "update repo: $repo_name"
+          cmd="git -C ./$repo_name pull --recurse-submodules" # update local repo
+        else
+          echo "clone repo : $repo_name"
+          cmd="git clone --recurse-submodules $repo_url" # create local repo
+        fi
+        $cmd # run command
+        result=$?
+        if [ "$result" -ne 0 ]; then
+          fails+=("$cmd") # remember fails
+        fi
+      fi
+    done < <(curl -sS "https://api.github.com/orgs/$org/repos?page=$page&per_page=$perpage" | grep -e 'clone_url.*' | cut -d \" -f 4 | xargs -L1 echo) # HINT: use process substitution to remember $fails
+  done
+  ((page += 1)) # next page
+done
+popd || exit 1
+
+# backup github issues
+mkdir -p ./$local_issue_dir
+pushd $local_issue_dir || exit 1
+for n in {1..100000}; do
+  url="${issue_base_url}${n}.html"
+  if [ $n -gt $min_issue_count ]; then
+    if ! wget --spider "${url}" 2>/dev/null; then
+      echo "stop: issue ${n} does not exist."
+      break
+    fi
+  fi
+  echo "save issue: ${n}"
+  wget -q -N -E -K "https://github.com/archivematica/Issues/issues/${n}.html" # FIXME: broken layout
+  # wget -q -N -E -K -k -p -H "https://github.com/archivematica/Issues/issues/${n}.html" # ALTERNATIVE: still broken layout but offline images
+  result=$?
+  if [ "$result" -ne 0 ]; then
+    fails+=("$cmd") # remember fails
+  fi
+done
+popd || exit 1
+
+# print fails (if any) and exit
+if [ ${#fails[@]} -eq 0 ]; then
+  exit 0 # no error during execution
+else
+  echo "################"
+  echo "FAILED COMMANDS:"
+  for fail in "${fails[@]}"; do
+    echo "$fail" # log failed command
+  done
+  exit 1 # errors during execution
+fi
-- 
GitLab