diff --git a/backup_am_github_repos_and_issues.sh b/backup_am_github_repos_and_issues.sh index 647ee08fc345a6ceef92f87912d7fb0fad90c27e..4891a98f84816d9ff63d6f7e9c17eb7eff267243 100644 --- a/backup_am_github_repos_and_issues.sh +++ b/backup_am_github_repos_and_issues.sh @@ -1,17 +1,67 @@ -#!/bin/bash -# This script clones/updates/saves all Archivematica related GitHub repositories and issues in the current directory. +#!/usr/bin/env bash +# This script clones/updates/saves all Archivematica related GitHub repositories and issues. -# var init +# functions +get_cli_args(){ +while [[ $# -gt 0 ]]; do + case ${1} in + -b|--backup_dir|--backup-dir|--backupdir) + [[ -w "${2}" ]] && backup_dir="${2}" + shift; shift; + ;; + *) + echo "'${1}' is not a valid parameter. Exiting." + exit 1 + ;; + esac +done +} + +print_fails(){ +# print fails (if any) and exit +if [[ ${#fails[@]} -eq 0 ]]; then + exit 0 # no error during execution +else + echo "################" + echo "FAILED COMMANDS:" + for fail in "${fails[@]}"; do + echo "${fail}" # log failed command + done + exit 1 # errors during execution +fi +fails=() # reset list of failed commands +} + + +# defaults orgnames="archivematica artefactual artefactual-labs" # GitHub organisations page=1 # starting page perpage=100 # HINT: 100 already per page max maxpage=3 # HINT: should be future proof, currently <200 AM-related repos -local_repo_dir="../archivematica_related_repos" # local subdir for repos -local_issue_dir="../archivematica_related_issues" # local subdir for issues orgs_base_url="https://api.github.com/orgs/" # base API URL for GitHub organisations issue_base_url="https://github.com/archivematica/Issues/issues/" # base URL for GitHub issues -min_issue_count=1672 # as of 2024-04-02 +min_issue_count=1709 # as of 2024-09-05 fails=() # set of failed commands +RED="\\e[31m" +WHITE="\\e[0m" +ERROR="${RED}[ERROR]\t${WHITE}" +backup_dir=".." + +# get CLI arguments +get_cli_args "${@}" + +# var init +local_repo_dir="${backup_dir}/archivematica_related_repos" # local subdir for repos +local_issue_dir="${backup_dir}/archivematica_related_issues" # local subdir for issues + +# list external required binaries here (space separated) +REQUIREMENTS="bash curl git sed wget" +for REQUIREMENT in ${REQUIREMENTS}; do + command -v "${REQUIREMENT}" >/dev/null 2>&1 || { echo >&2 "${ERROR} '${REQUIREMENT}' required but not installed. Aborting."; exit 1; } +done + +### MAIN + # backup github repos [[ ! -d ${local_repo_dir} ]] && mkdir -p ${local_repo_dir} @@ -19,21 +69,17 @@ pushd ${local_repo_dir} || exit 1 until [ ${page} -gt ${maxpage} ]; do for org in ${orgnames}; do while read -r repo_url; do - if [ -n "${repo_url}" ]; then - repo_name=$(echo "${repo_url}" | sed 's#^.*/\([^/]*\)\.git$#\1#g') # get repo name + if [[ -n "${repo_url}" ]]; then + repo_name=$( basename -s ".git" "${repo_url}" ) # get repo name echo "############" - if [ -d "./${repo_name}" ]; then + if [[ -d "./${repo_name}" ]]; then echo "update repo: ${repo_name}" cmd="git -C ./${repo_name} pull --recurse-submodules" # update local repo else echo "clone repo : ${repo_name}" cmd="git clone --recurse-submodules ${repo_url}" # create local repo fi - $cmd # run command - result=$? - if [ "${result}" -ne 0 ]; then - fails+=("${cmd}") # remember fails - fi + ${cmd} || fails+=("${cmd}") # remember fails fi done < <(curl -sS "${orgs_base_url}${org}/repos?page=${page}&per_page=${perpage}" | grep -e 'clone_url.*' | cut -d \" -f 4 | xargs -L1 echo) # HINT: use process substitution to remember $fails done @@ -41,35 +87,23 @@ until [ ${page} -gt ${maxpage} ]; do done popd || exit 1 +print_fails + # backup github issues [[ ! -d ${local_issue_dir} ]] && mkdir -p ${local_issue_dir} pushd ${local_issue_dir} || exit 1 for n in {1..100000}; do url="${issue_base_url}${n}.html" - if [ ${n} -gt ${min_issue_count} ]; then + if [[ ${n} -gt ${min_issue_count} ]]; then if ! wget --spider "${url}" 2>/dev/null; then echo "stop: issue ${n} does not exist." break fi fi echo "save issue: ${n}" - wget -q -N -E -K "${issue_base_url}${n}.html" # FIXME: broken layout + wget -q -N -E -K "${issue_base_url}${n}.html" || fails+=("${cmd}") # FIXME: broken layout # wget -q -N -E -K -k -p -H "${issue_base_url}${n}.html" # ALTERNATIVE: still broken layout but offline images - result=$? - if [ "${result}" -ne 0 ]; then - fails+=("${cmd}") # remember fails - fi done popd || exit 1 -# print fails (if any) and exit -if [ ${#fails[@]} -eq 0 ]; then - exit 0 # no error during execution -else - echo "################" - echo "FAILED COMMANDS:" - for fail in "${fails[@]}"; do - echo "${fail}" # log failed command - done - exit 1 # errors during execution -fi +print_fails