-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcopy_github_repos.sh
executable file
·346 lines (273 loc) · 9.08 KB
/
copy_github_repos.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#!/usr/bin/env bash
# you can edit these variables to suit your needs
BACKUPS="$HOME/github-backups"
LOGS_DIR="$BACKUPS/logs"
# set max number of iterations - this will equate to 10 pages of 100 repos
# so should be enough for most users, increase if you have more than 1000 repos
MAX=10
DAYS_AGO="-30 days"
###############################################
# DO NOT EDIT BELOW THIS LINE
CURRENT_DATE=$(date +'%Y-%m-%d_%H-%M-%S')
# calculate the current date as an epoch minus 30 days
# this will be used to filter out repos that have not been updated in the last N days
# eg./ 30 days = 2592000 seconds
CURRENT_DATE_MINUS_N_DAYS=$(date -d "$DAYS_AGO" +%s)
# Initialize a counter for options
option_count=0
check_environment() {
# check bash is installed
if ! [ -x "$(command -v bash)" ]; then
echo "Error: bash is not installed." >&2
exit 1
fi
# check curl is installed
if ! [ -x "$(command -v curl)" ]; then
echo "Error: curl is not installed." >&2
exit 1
fi
# check jq is installed
if ! [ -x "$(command -v jq)" ]; then
echo "Error: jq is not installed." >&2
exit 1
fi
# check git is installed
if ! [ -x "$(command -v git)" ]; then
echo "Error: git is not installed." >&2
exit 1
fi
}
usage() {
# TEMP=$(getopt -o e:p:w:rhf --long envfile:,prefix:,workdir:,run,help,full -n "$0" -- "$@")
echo "Usage: $0 [-e|--envfile <string>] [-p|--prefix <string>] [-w | --workdir <string>] [-h|--help] [-r | --run] [-f | --full]" 1>&2
cat <<EOF
<env_file> is the name of the file to read environment variables from.
Example 1:
$0 -e env/youruser -p youruser --run --full
backs up all repos it can find up to 10 pages of 100 repos per page
If the env file is not in the current directory, provide the full path to the file.
Example 2:
$0 -e env/youruser -p youruser --run
backs up repos that have been updated in the last 30 days
Example env file contentts
GIT_USERNAME=<your github username>
GIT_TOKEN=ghp_<your github personal access token>
EOF
exit 1
}
check_environment
# Parse options using getopt
TEMP=$(getopt -o e:p:w:rhf --long envfile:,prefix:,workdir:,run,help,full -n "$0" -- "$@")
# colons : after e, p, and w indicate that these options expect values.
# The absence of a colon after r, h, and f indicates that they are boolean flags,
# meaning they don't require values.
eval set -- "$TEMP"
while true; do
case "$1" in
-e | --envfile)
e=$2
shift 2
;;
-p | --prefix)
p=$2
shift 2
;;
-w | --workdir)
workdir=$2
shift 2
;;
-r | --run)
r=true
shift
;;
-f | --full)
f=true
shift
;;
-h | --help)
h=true
usage
shift
;;
--)
shift
break
;;
*)
usage
;;
esac
# Increase the option count
option_count=$((option_count + 1))
done
# Check if no options were provided
if [ "$option_count" -eq 0 ]; then
echo ""
usage
fi
if [ -z "${e}" ] && [ -z "${p}" ]; then
# Both -e and -p are empty, treat them as optional
echo "No values provided for environment file -e and prefix -p, using default values or fallback logic"
e=".env"
p=""
elif [ -z "${e}" ]; then
# -e is empty, treat it as optional
echo "No value provided for environment file -e, using default value or fallback logic"
e=".env"
elif [ -z "${p}" ]; then
# -p is empty, treat it as optional
echo "No value provided for prefix -p, using default value or fallback logic"
p=""
fi
# function to copy all repos
function clone_repo() {
echo "Cloning $repo..."
git clone "https://$GIT_USERNAME:$GIT_TOKEN@github.com/$repo.git"
# Change into the cloned repository directory
repo_name=$(basename "$repo")
cd "$repo_name"
# Fetch the latest changes from the remote repository
git fetch --prune
# Iterate through remote branches
remote_branches=$(git branch -r | grep -v HEAD)
for remote_branch in $remote_branches; do
branch_name=$(echo "$remote_branch" | sed 's/origin\///')
# Strip out leading spaces from branch name
branch_name=$(echo "$branch_name" | sed 's/^ *//g')
# Checkout the branch
echo "Checking out [$branch_name]..."
git checkout "$branch_name"
done
# Change back to the previous directory
cd ..
echo "Done cloning $repo."
echo
}
# create a function that will process output of the response
function backup_each_repo_in_page() {
# use jq to extract full_name from each objecct in the json array
repo_names=$(echo "$JSON" | jq -r '.[].full_name')
# updated_at
updated_at=$(echo "$JSON" | jq -r '.[].updated_at')
repo_info=$(echo "$JSON" | jq -r '.[] | "\(.full_name)|\(.updated_at)"')
# Iterate over the repository names
IFS=$'\n' # Set IFS to newline to properly handle lines
for info in $repo_info; do
IFS='|' read -ra repo_info_array <<<"$info"
repo="${repo_info_array[0]}"
datetime="${repo_info_array[1]}"
epoch_timestamp=$(date -d "$datetime" +%s)
# if $f is 'true' then clone all repos
if [ -z "${f}" ]; then
# if the repo has not been updated in the last N days, skip it
if [ "$epoch_timestamp" -lt "$CURRENT_DATE_MINUS_N_DAYS" ]; then
continue
fi
echo "recently modified repo $repo datetime is $datetime"
clone_repo
else
echo "repo is $repo [full backup in progress]"
clone_repo
fi
done
}
function curl_github_api() {
# get a full list of all repos taking into account pagination
# and a limit of 100 repos per page from the GitHub API
# then call backup_each_repo_in_page to process the response
# set run variable to true
r=true
# set the page count to 1
count=1
# untill the run variable is false and MAX is not exceeded run a loop
until [ "$r" = false ]; do
MAX=$((MAX - 1))
if [ "$MAX" -eq 0 ]; then
r=false
fi
echo "Getting page $count of repos..."
JSON=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GIT_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/user/repos?per_page=100&page=$count")
# check if response contains the string "Requires authentication" or "Bad credentials"
if [[ $JSON == *"Requires authentication"* ]] || [[ $JSON == *"Bad credentials"* ]]; then
echo "Invalid username or token. Please set the correct username and token in the script."
exit 1
fi
count=$((count + 1))
REPO_COUNT=$(echo "$JSON" | jq length)
if [ $REPO_COUNT -eq 0 ]; then
echo "No repositories found."
exit 1
fi
echo "Found $REPO_COUNT objects."
# if $REPO_COUNT is less than 100 then we have reached the end of the list
if [ "$REPO_COUNT" -lt 100 ]; then
r=false
fi
if [ ! -z "${workdir}" ]; then
echo "workdir is not empty, setting BACKUP_DIR to workdir"
BACKUP_DIR="${workdir}"
mkdir -p $BACKUP_DIR
fi
cd $BACKUP_DIR
# process the response
backup_each_repo_in_page
echo ""
echo "... repositories have been cloned to $BACKUP_DIR"
done
}
function main() {
echo ""
echo "using the following arguments:"
echo ""
echo "envfile = ${e}"
echo " prefix = ${p}"
echo " run = ${r}"
echo " full = ${f}"
echo "workdir = ${workdir}"
# if the environment file doesn't exist, print error and exit
if [ ! -f "$e" ]; then
echo "environment file $e does not exist"
exit 1
fi
source "$e"
prefix=""
if [ -z "${p}" ]; then
echo "no prefix set"
else
prefix="${p}_"
echo "prefix set to [$prefix]"
fi
# if r is 'true'
if [ -z "${r}" ]; then
echo "not running anything as --run is not present in the arguments"
else
if [ ! -z "${workdir}" ]; then
echo "workdir is not empty, setting BACKUP_DIR to workdir"
BACKUP_DIR="${workdir}"
mkdir -p $BACKUP_DIR
LOGS_DIR="${workdir}/logs"
mkdir -p $LOGS_DIR
fi
LOG_FILE="$LOGS_DIR/$GIT_USERNAME-$CURRENT_DATE.log"
# create logs directory if it doesn't exist
if [ ! -d "$LOGS_DIR" ]; then
mkdir -p "$LOGS_DIR"
fi
exec > >(tee -a $LOG_FILE)
echo "run is true, runing curl_github_api"
# create a backupt directory with timestamp in the name and change into it
BACKUP_DIR="${BACKUPS}/${prefix}${CURRENT_DATE}"
echo "backups is $BACKUPS"
echo "prefix is $prefix"
echo "CURRENT_DATE is $CURRENT_DATE"
echo "BACKUP_DIR is $BACKUP_DIR"
mkdir -p $BACKUP_DIR
curl_github_api
fi
exit 0
}
main