From 8f6bdde6662aa8050a71eadbdb7bd5a3b079a56d Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 20 Oct 2023 13:00:15 -0400 Subject: [PATCH] Label PRs when the json schema changes (#2240) * label PRs when the json schema changes Signed-off-by: Alex Goodman * moderate pr comments Signed-off-by: Alex Goodman * be more strict about processing file names Signed-off-by: Alex Goodman --------- Signed-off-by: Alex Goodman --- .github/scripts/labeler.py | 224 ++++++++++++++++++++++++++++++++ .github/scripts/labeler_test.py | 65 +++++++++ .github/workflows/labeler.yaml | 54 ++++++++ .gitignore | 5 + DEVELOPING.md | 1 + Makefile | 6 +- 6 files changed, 354 insertions(+), 1 deletion(-) create mode 100644 .github/scripts/labeler.py create mode 100644 .github/scripts/labeler_test.py create mode 100644 .github/workflows/labeler.yaml diff --git a/.github/scripts/labeler.py b/.github/scripts/labeler.py new file mode 100644 index 00000000000..7ddeee7b895 --- /dev/null +++ b/.github/scripts/labeler.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import sys +import glob +import subprocess +import os +import re + +DRY_RUN = False + + +def main(changed_files: str | None = None, merge_base_schema_files: str | None = None): + global DRY_RUN + + pr_number = os.environ.get("GITHUB_PR_NUMBER") + comment_file_path = os.environ.get("CI_COMMENT_FILE") + + if not comment_file_path: + print("CI_COMMENT_FILE not set") + sys.exit(1) + + if not pr_number: + DRY_RUN = True + + if changed_files: + DRY_RUN = True + + # read lines from file... this is useful for local testing + with open(changed_files) as f: + pr_changed_files = f.read().splitlines() + + with open(merge_base_schema_files) as f: + og_json_schema_files = sort_json_schema_files(f.read().splitlines()) + + else: + if not is_ci(): + print("Not in CI") + sys.exit(1) + + if not pr_number: + print("Not a PR") + sys.exit(1) + + pr_changed_files = get_pr_changed_files(pr_number) + # since we are running this in the context of the pull_request_target, the checkout is the merge base.. + # that is the main branch of the original repo, NOT the branch in the forked repo (or branch in the target + # repo for non-forked PRs). This means we just need to list the current checkedout files to get a sense of + # the changes before a merge. + og_json_schema_files = list_json_schema_files() + + pr_json_schema_files = filter_to_schema_files(pr_changed_files) + + # print("schema files in pr: ", summarize_schema_files(pr_json_schema_files)) + # print("og schema files: ", summarize_schema_files(og_json_schema_files)) + + if not og_json_schema_files: + print("No schema files found in merge base") + sys.exit(1) + + # pr_json_schema_files = set of PR files are added, removed, and changed files + new_schema_files = set(pr_json_schema_files) - set(og_json_schema_files) + removed_or_modified_schema_files = set(pr_json_schema_files) - set(new_schema_files) + + print("new schemas: ", summarize_schema_files(new_schema_files)) + print("removed or modified schemas:", summarize_schema_files(removed_or_modified_schema_files)) + + # if there is a new or modified schema, we should add the "json-schema" label to the PR... + if new_schema_files or removed_or_modified_schema_files: + print("\nAdding json-schema label...") + add_label(pr_number, "json-schema") + else: + remove_label(pr_number, "json-schema") + + # new schema files should be scrutinized, comparing the latest and added versions to see if it's a breaking + # change (major version bump). Warn about it on the PR via adding a breaking-change label... + if is_breaking_change(new_schema_files, og_json_schema_files[-1]): + print("\nBreaking change detected...") + add_label(pr_number, "breaking-change") + else: + remove_label(pr_number, "breaking-change") + + # modifying an existing schema could be a breaking change, we should warn about it on the PR via a comment... + # removing schema files should never be allowed, we should warn about it on the PR via a comment... + if removed_or_modified_schema_files: + print("\nRemoved or modified schema detected...") + schemas = sort_json_schema_files(list(removed_or_modified_schema_files)) + schemas_str = "\n".join([f" - {schema}" for schema in schemas]) + add_comment(comment_file_path, f"Detected modification or removal of existing json schemas:\n{schemas_str}", warning=True) + + +def add_comment(comment_file_path: str, comment: str, warning: bool = False, important: bool = False): + if warning or important: + comment_lines = comment.splitlines() + comment = "\n".join([f"> {line}" for line in comment_lines]) + + if warning: + comment = f"> [!WARNING]\n{comment}" + elif important: + comment = f"> [!IMPORTANT]\n{comment}" + + # create any parent directories if they don't exist + os.makedirs(os.path.dirname(comment_file_path), exist_ok=True) + + with open(comment_file_path, "w") as f: + f.write(comment) + + print(f"Comment file contents: {comment_file_path}") + print(comment) + + +def add_label(pr_number: str, label: str): + # run "gh pr edit --add-label