Skip to content
Snippets Groups Projects
Commit e6edd4c2 authored by Jens Steidl's avatar Jens Steidl :baby_chick:
Browse files

- init script

parent fdc015ad
Branches
No related tags found
No related merge requests found
#!/bin/bash
# This script clones/updates/saves all Archivematica related GitHub repositories and issues in the current directory.
# var init
orgnames="archivematica artefactual artefactual-labs" # GitHub organisations
page=1 # starting page
perpage=100 # HINT: 100 already per page max
maxpage=3 # HINT: should be future proof, currently <200 AM-related repos
local_repo_dir="archivematica_related_repos" # local subdir for repos
local_issue_dir="archivematica_related_issues" # local subdir for isses
issue_base_url="https://github.com/archivematica/Issues/issues/" # base URL for GitHub issues
min_issue_count=1672 # as of 2024-04-02
fails=() # set of failed commands
# backup github repos
mkdir -p ./$local_repo_dir
pushd $local_repo_dir || exit 1
until [ $page -gt $maxpage ]; do
for org in $orgnames; do
while read -r repo_url; do
if [ -n "$repo_url" ]; then
repo_name=$(echo "$repo_url" | sed 's#^.*/\([^/]*\)\.git$#\1#g') # get repo name
echo "############"
if [ -d "./$repo_name" ]; then
echo "update repo: $repo_name"
cmd="git -C ./$repo_name pull --recurse-submodules" # update local repo
else
echo "clone repo : $repo_name"
cmd="git clone --recurse-submodules $repo_url" # create local repo
fi
$cmd # run command
result=$?
if [ "$result" -ne 0 ]; then
fails+=("$cmd") # remember fails
fi
fi
done < <(curl -sS "https://api.github.com/orgs/$org/repos?page=$page&per_page=$perpage" | grep -e 'clone_url.*' | cut -d \" -f 4 | xargs -L1 echo) # HINT: use process substitution to remember $fails
done
((page += 1)) # next page
done
popd || exit 1
# backup github issues
mkdir -p ./$local_issue_dir
pushd $local_issue_dir || exit 1
for n in {1..100000}; do
url="${issue_base_url}${n}.html"
if [ $n -gt $min_issue_count ]; then
if ! wget --spider "${url}" 2>/dev/null; then
echo "stop: issue ${n} does not exist."
break
fi
fi
echo "save issue: ${n}"
wget -q -N -E -K "https://github.com/archivematica/Issues/issues/${n}.html" # FIXME: broken layout
# wget -q -N -E -K -k -p -H "https://github.com/archivematica/Issues/issues/${n}.html" # ALTERNATIVE: still broken layout but offline images
result=$?
if [ "$result" -ne 0 ]; then
fails+=("$cmd") # remember fails
fi
done
popd || exit 1
# print fails (if any) and exit
if [ ${#fails[@]} -eq 0 ]; then
exit 0 # no error during execution
else
echo "################"
echo "FAILED COMMANDS:"
for fail in "${fails[@]}"; do
echo "$fail" # log failed command
done
exit 1 # errors during execution
fi
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment