From e6edd4c256c22ef07299b6ace9efb1f29edb37c1 Mon Sep 17 00:00:00 2001 From: Jens Steidl <Jens.Steidl@slub-dresden.de> Date: Wed, 3 Apr 2024 10:36:09 +0200 Subject: [PATCH] - init script --- backup_am_github_repos_and_issues.sh | 74 ++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 backup_am_github_repos_and_issues.sh diff --git a/backup_am_github_repos_and_issues.sh b/backup_am_github_repos_and_issues.sh new file mode 100644 index 0000000..4935801 --- /dev/null +++ b/backup_am_github_repos_and_issues.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# This script clones/updates/saves all Archivematica related GitHub repositories and issues in the current directory. + +# var init +orgnames="archivematica artefactual artefactual-labs" # GitHub organisations +page=1 # starting page +perpage=100 # HINT: 100 already per page max +maxpage=3 # HINT: should be future proof, currently <200 AM-related repos +local_repo_dir="archivematica_related_repos" # local subdir for repos +local_issue_dir="archivematica_related_issues" # local subdir for isses +issue_base_url="https://github.com/archivematica/Issues/issues/" # base URL for GitHub issues +min_issue_count=1672 # as of 2024-04-02 +fails=() # set of failed commands + +# backup github repos +mkdir -p ./$local_repo_dir +pushd $local_repo_dir || exit 1 +until [ $page -gt $maxpage ]; do + for org in $orgnames; do + while read -r repo_url; do + if [ -n "$repo_url" ]; then + repo_name=$(echo "$repo_url" | sed 's#^.*/\([^/]*\)\.git$#\1#g') # get repo name + echo "############" + if [ -d "./$repo_name" ]; then + echo "update repo: $repo_name" + cmd="git -C ./$repo_name pull --recurse-submodules" # update local repo + else + echo "clone repo : $repo_name" + cmd="git clone --recurse-submodules $repo_url" # create local repo + fi + $cmd # run command + result=$? + if [ "$result" -ne 0 ]; then + fails+=("$cmd") # remember fails + fi + fi + done < <(curl -sS "https://api.github.com/orgs/$org/repos?page=$page&per_page=$perpage" | grep -e 'clone_url.*' | cut -d \" -f 4 | xargs -L1 echo) # HINT: use process substitution to remember $fails + done + ((page += 1)) # next page +done +popd || exit 1 + +# backup github issues +mkdir -p ./$local_issue_dir +pushd $local_issue_dir || exit 1 +for n in {1..100000}; do + url="${issue_base_url}${n}.html" + if [ $n -gt $min_issue_count ]; then + if ! wget --spider "${url}" 2>/dev/null; then + echo "stop: issue ${n} does not exist." + break + fi + fi + echo "save issue: ${n}" + wget -q -N -E -K "https://github.com/archivematica/Issues/issues/${n}.html" # FIXME: broken layout + # wget -q -N -E -K -k -p -H "https://github.com/archivematica/Issues/issues/${n}.html" # ALTERNATIVE: still broken layout but offline images + result=$? + if [ "$result" -ne 0 ]; then + fails+=("$cmd") # remember fails + fi +done +popd || exit 1 + +# print fails (if any) and exit +if [ ${#fails[@]} -eq 0 ]; then + exit 0 # no error during execution +else + echo "################" + echo "FAILED COMMANDS:" + for fail in "${fails[@]}"; do + echo "$fail" # log failed command + done + exit 1 # errors during execution +fi -- GitLab