| #!/usr/bin/env python3 |
| # ===-- commit-access-review.py --------------------------------------------===# |
| # |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| # |
| # ===------------------------------------------------------------------------===# |
| # |
| # ===------------------------------------------------------------------------===# |
| |
| import datetime |
| import github |
| import re |
| import requests |
| import time |
| import sys |
| import re |
| |
| |
| class User: |
| THRESHOLD = 5 |
| |
| def __init__(self, name, triage_list): |
| self.name = name |
| self.authored = 0 |
| self.merged = 0 |
| self.reviewed = 0 |
| self.triage_list = triage_list |
| |
| def add_authored(self, val=1): |
| self.authored += val |
| if self.meets_threshold(): |
| print(self.name, "meets the threshold with authored commits") |
| del self.triage_list[self.name] |
| |
| def set_authored(self, val): |
| self.authored = 0 |
| self.add_authored(val) |
| |
| def add_merged(self, val=1): |
| self.merged += val |
| if self.meets_threshold(): |
| print(self.name, "meets the threshold with merged commits") |
| del self.triage_list[self.name] |
| |
| def add_reviewed(self, val=1): |
| self.reviewed += val |
| if self.meets_threshold(): |
| print(self.name, "meets the threshold with reviewed commits") |
| del self.triage_list[self.name] |
| |
| def get_total(self): |
| return self.authored + self.merged + self.reviewed |
| |
| def meets_threshold(self): |
| return self.get_total() >= self.THRESHOLD |
| |
| def __repr__(self): |
| return "{} : a: {} m: {} r: {}".format( |
| self.name, self.authored, self.merged, self.reviewed |
| ) |
| |
| |
| def check_manual_requests( |
| gh: github.Github, start_date: datetime.datetime |
| ) -> list[str]: |
| """ |
| Return a list of users who have been asked since ``start_date`` if they |
| want to keep their commit access or if they have applied for commit |
| access since ``start_date`` |
| """ |
| |
| query = """ |
| query ($query: String!, $after: String) { |
| search(query: $query, type: ISSUE, first: 100, after: $after) { |
| nodes { |
| ... on Issue { |
| author { |
| login |
| } |
| body |
| } |
| } |
| pageInfo { |
| hasNextPage |
| endCursor |
| } |
| } |
| } |
| """ |
| formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") |
| variables = { |
| "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infra:commit-access,infra:commit-access-request" |
| } |
| |
| has_next_page = True |
| users = [] |
| while has_next_page: |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=query, variables=variables |
| ) |
| data = res_data["data"] |
| for issue in data["search"]["nodes"]: |
| users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])]) |
| if issue["author"]: |
| users.append(issue["author"]["login"]) |
| has_next_page = data["search"]["pageInfo"]["hasNextPage"] |
| if has_next_page: |
| variables["after"] = data["search"]["pageInfo"]["endCursor"] |
| return users |
| |
| |
| def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int: |
| """ |
| Get number of commits that ``user`` has been made since ``start_date`. |
| """ |
| variables = { |
| "owner": "llvm", |
| "user": user, |
| "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"), |
| } |
| |
| user_query = """ |
| query ($user: String!) { |
| user(login: $user) { |
| id |
| } |
| } |
| """ |
| |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=user_query, variables=variables |
| ) |
| data = res_data["data"] |
| variables["user_id"] = data["user"]["id"] |
| |
| query = """ |
| query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){ |
| organization(login: $owner) { |
| teams(query: "llvm-committers" first:1) { |
| nodes { |
| repositories { |
| nodes { |
| ref(qualifiedName: "main") { |
| target { |
| ... on Commit { |
| history(since: $start_date, author: {id: $user_id }) { |
| totalCount |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| """ |
| count = 0 |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=query, variables=variables |
| ) |
| data = res_data["data"] |
| for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]: |
| count += int(repo["ref"]["target"]["history"]["totalCount"]) |
| if count >= User.THRESHOLD: |
| break |
| return count |
| |
| |
| def is_new_committer_query_repo( |
| gh: github.Github, user: str, start_date: datetime.datetime |
| ) -> bool: |
| """ |
| Determine if ``user`` is a new committer. A new committer can keep their |
| commit access even if they don't meet the criteria. |
| """ |
| variables = { |
| "user": user, |
| } |
| |
| user_query = """ |
| query ($user: String!) { |
| user(login: $user) { |
| id |
| } |
| } |
| """ |
| |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=user_query, variables=variables |
| ) |
| data = res_data["data"] |
| variables["owner"] = "llvm" |
| variables["user_id"] = data["user"]["id"] |
| variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S") |
| |
| query = """ |
| query ($owner: String!, $user_id: ID!){ |
| organization(login: $owner) { |
| repository(name: "llvm-project") { |
| ref(qualifiedName: "main") { |
| target { |
| ... on Commit { |
| history(author: {id: $user_id }, first: 5) { |
| nodes { |
| committedDate |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| """ |
| |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=query, variables=variables |
| ) |
| data = res_data["data"] |
| repo = data["organization"]["repository"] |
| commits = repo["ref"]["target"]["history"]["nodes"] |
| if len(commits) == 0: |
| return True |
| committed_date = commits[-1]["committedDate"] |
| if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date: |
| return False |
| return True |
| |
| |
| def is_new_committer( |
| gh: github.Github, user: str, start_date: datetime.datetime |
| ) -> bool: |
| """ |
| Wrapper around is_new_commiter_query_repo to handle exceptions. |
| """ |
| try: |
| return is_new_committer_query_repo(gh, user, start_date) |
| except: |
| pass |
| return True |
| |
| |
| def get_review_count( |
| gh: github.Github, user: str, start_date: datetime.datetime |
| ) -> int: |
| """ |
| Return the number of reviews that ``user`` has done since ``start_date``. |
| """ |
| query = """ |
| query ($query: String!) { |
| search(query: $query, type: ISSUE, first: 5) { |
| issueCount |
| } |
| } |
| """ |
| formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") |
| variables = { |
| "owner": "llvm", |
| "repo": "llvm-project", |
| "user": user, |
| "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm", |
| } |
| |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=query, variables=variables |
| ) |
| data = res_data["data"] |
| return int(data["search"]["issueCount"]) |
| |
| |
| def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime): |
| """ |
| Fetch all the merged PRs for the project since ``start_date`` and update |
| ``triage_list`` with the number of PRs merged for each user. |
| """ |
| |
| query = """ |
| query ($query: String!, $after: String) { |
| search(query: $query, type: ISSUE, first: 100, after: $after) { |
| issueCount, |
| nodes { |
| ... on PullRequest { |
| author { |
| login |
| } |
| mergedBy { |
| login |
| } |
| } |
| } |
| pageInfo { |
| hasNextPage |
| endCursor |
| } |
| } |
| } |
| """ |
| date_begin = start_date |
| date_end = None |
| while date_begin < datetime.datetime.now(): |
| date_end = date_begin + datetime.timedelta(days=7) |
| formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S") |
| formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S") |
| variables = { |
| "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm", |
| } |
| has_next_page = True |
| while has_next_page: |
| print(variables) |
| res_header, res_data = gh._Github__requester.graphql_query( |
| query=query, variables=variables |
| ) |
| data = res_data["data"] |
| for pr in data["search"]["nodes"]: |
| # Users can be None if the user has been deleted. |
| if not pr["author"]: |
| continue |
| author = pr["author"]["login"] |
| if author in triage_list: |
| triage_list[author].add_authored() |
| |
| if not pr["mergedBy"]: |
| continue |
| merger = pr["mergedBy"]["login"] |
| if author == merger: |
| continue |
| if merger not in triage_list: |
| continue |
| triage_list[merger].add_merged() |
| |
| has_next_page = data["search"]["pageInfo"]["hasNextPage"] |
| if has_next_page: |
| variables["after"] = data["search"]["pageInfo"]["endCursor"] |
| date_begin = date_end |
| |
| |
| def main(): |
| token = sys.argv[1] |
| gh = github.Github(login_or_token=token) |
| org = gh.get_organization("llvm") |
| repo = org.get_repo("llvm-project") |
| one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) |
| triage_list = {} |
| for collaborator in repo.get_collaborators(permission="push"): |
| triage_list[collaborator.login] = User(collaborator.login, triage_list) |
| |
| print("Start:", len(triage_list), "triagers") |
| # Step 0 Check if users have requested commit access in the last year. |
| for user in check_manual_requests(gh, one_year_ago): |
| if user in triage_list: |
| print(user, "requested commit access in the last year.") |
| del triage_list[user] |
| print("After Request Check:", len(triage_list), "triagers") |
| |
| # Step 1 count all PRs authored or merged |
| count_prs(gh, triage_list, one_year_ago) |
| |
| print("After PRs:", len(triage_list), "triagers") |
| |
| if len(triage_list) == 0: |
| sys.exit(0) |
| |
| # Step 2 check for reviews |
| for user in list(triage_list.keys()): |
| review_count = get_review_count(gh, user, one_year_ago) |
| triage_list[user].add_reviewed(review_count) |
| |
| print("After Reviews:", len(triage_list), "triagers") |
| |
| if len(triage_list) == 0: |
| sys.exit(0) |
| |
| # Step 3 check for number of commits |
| for user in list(triage_list.keys()): |
| num_commits = get_num_commits(gh, user, one_year_ago) |
| # Override the total number of commits to not double count commits and |
| # authored PRs. |
| triage_list[user].set_authored(num_commits) |
| |
| print("After Commits:", len(triage_list), "triagers") |
| |
| # Step 4 check for new committers |
| for user in list(triage_list.keys()): |
| print("Checking", user) |
| if is_new_committer(gh, user, one_year_ago): |
| print("Removing new committer: ", user) |
| del triage_list[user] |
| |
| print("Complete:", len(triage_list), "triagers") |
| |
| with open("triagers.log", "w") as triagers_log: |
| for user in triage_list: |
| print(triage_list[user].__repr__()) |
| triagers_log.write(user + "\n") |
| |
| |
| if __name__ == "__main__": |
| main() |