Skip to content

Commit

Permalink
Merge pull request #383 from ipdgroup/master
Browse files Browse the repository at this point in the history
Implementing incremental by files, safer version of incremental backup.
  • Loading branch information
josegonzalez authored Feb 1, 2025
2 parents 3a4aebb + 0f34ecb commit 095b712
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ CLI Help output::
log level to use (default: info, possible levels:
debug, info, warning, error, critical)
-i, --incremental incremental backup
--incremental-by-files incremental backup using modified time of files
--starred include JSON output of starred repositories in backup
--all-starred include starred repositories in backup [*]
--watched include JSON output of watched repositories in backup
Expand Down Expand Up @@ -239,6 +240,12 @@ Using (``-i, --incremental``) will only request new data from the API **since th

This means any blocking errors on previous runs can cause a large amount of missing data in backups.

Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something.

Still saver than the previous version.

Specifically, issues and pull requests are handled like this.

Known blocking errors
---------------------

Expand Down
29 changes: 25 additions & 4 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ def parse_args(args=None):
dest="incremental",
help="incremental backup",
)
parser.add_argument(
"--incremental-by-files",
action="store_true",
dest="incremental_by_files",
help="incremental backup based on modification date of files",
)
parser.add_argument(
"--starred",
action="store_true",
Expand Down Expand Up @@ -1114,16 +1120,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
comments_template = _issue_template + "/{0}/comments"
events_template = _issue_template + "/{0}/events"
for number, issue in list(issues.items()):
issue_file = "{0}/{1}.json".format(issue_cwd, number)
if args.incremental_by_files and os.path.isfile(issue_file):
modified = os.path.getmtime(issue_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > issue["updated_at"]:
logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number))
continue

if args.include_issue_comments or args.include_everything:
template = comments_template.format(number)
issues[number]["comment_data"] = retrieve_data(args, template)
if args.include_issue_events or args.include_everything:
template = events_template.format(number)
issues[number]["event_data"] = retrieve_data(args, template)

issue_file = "{0}/{1}.json".format(issue_cwd, number)
with codecs.open(issue_file, "w", encoding="utf-8") as f:
with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f:
json_dump(issue, f)
os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic


def backup_pulls(args, repo_cwd, repository, repos_template):
Expand Down Expand Up @@ -1176,6 +1190,13 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
comments_template = _pulls_template + "/{0}/comments"
commits_template = _pulls_template + "/{0}/commits"
for number, pull in list(pulls.items()):
pull_file = "{0}/{1}.json".format(pulls_cwd, number)
if args.incremental_by_files and os.path.isfile(pull_file):
modified = os.path.getmtime(pull_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > pull["updated_at"]:
logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number))
continue
if args.include_pull_comments or args.include_everything:
template = comments_regular_template.format(number)
pulls[number]["comment_regular_data"] = retrieve_data(args, template)
Expand All @@ -1185,9 +1206,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
template = commits_template.format(number)
pulls[number]["commit_data"] = retrieve_data(args, template)

pull_file = "{0}/{1}.json".format(pulls_cwd, number)
with codecs.open(pull_file, "w", encoding="utf-8") as f:
with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f:
json_dump(pull, f)
os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic


def backup_milestones(args, repo_cwd, repository, repos_template):
Expand Down

0 comments on commit 095b712

Please # to comment.