From: Hoa Nguyen Date: Fri, 18 Sep 2020 01:32:19 +0000 (-0700) Subject: util: Add a gerrit bot X-Git-Tag: develop-gem5-snapshot~411 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dbe4ab5c0f4c0699df70a20875520ffc56cea31a;p=gem5.git util: Add a gerrit bot This bot utilizes the Gerrit REST API to query for new changes made to Gerrit within a certain amount of time and performs a set of tests on the changes. Signed-off-by: Hoa Nguyen Change-Id: I9d5af31d952bc0cd791f1569e6aac7c270e687e4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/34737 Reviewed-by: Andreas Sandberg Reviewed-by: Jason Lowe-Power Reviewed-by: Bobby R. Bruce Maintainer: Jason Lowe-Power Tested-by: kokoro --- diff --git a/util/gerrit-bot/.gitignore b/util/gerrit-bot/.gitignore new file mode 100644 index 000000000..ea26ec23a --- /dev/null +++ b/util/gerrit-bot/.gitignore @@ -0,0 +1 @@ +.data diff --git a/util/gerrit-bot/README.md b/util/gerrit-bot/README.md new file mode 100644 index 000000000..df1b62b4c --- /dev/null +++ b/util/gerrit-bot/README.md @@ -0,0 +1,57 @@ +## Gerrit Bot + +### Getting Username and Password + +* Follow this link +[https://gem5-review.googlesource.com/new-password](https://gem5-review.googlesource.com/new-password) +and copy the authenticating script to a file, supposedly `gerrit_auth_script`. + +* After that, run the `extract_gitcookies.py` to extract the username and +password from the obtained script. +For example, the following command extracts the username and password from +`gerrit_auth_script` and writes them to `GEM5_BOT_AUTH_INFO`, +```sh +python3 extract_gitcookies.py gerrit_auth_script GEM5_BOT_AUTH_INFO +``` +The `GEM5_BOT_AUTH_INFO` will have two lines: the first line contains the +username and the second line is the corresponding password. +**Notes:** +* The above link, [https://gem5-review.googlesource.com/new-password](https://gem5-review.googlesource.com/new-password), +generates a new pair of username and password per visit. +* The `extract_gitcookies.py` file is also able to read from `.gitcookies` +file. For example, `python3 extract_gitcookies.py ~/.gitcookies output` +will write all pairs of username and password in two lines per pair to +`output`. +* The gerrit-bot only reads the pair of username and password appearing +in the first and the second line in the `GEM5_BOT_AUTH_INFO` file. + +### Gerrit Bot + +**Notes:** this is likely to be changed. + +The idea of implementing the bot is as follows, +* The `Configs` class should contain all constants that are configurable +prior to running. +* Classes such as `LabelInfo` and `ReviewInput` are simplied versions +resembling those data structures of the same name according to the +[Gerrit REST API documentation](https://gerrit-review.googlesource.com/Documentation/rest-api.html#_endpoints). +* In the class `GerritRestAPIRequester`, + * The `__generate_*_request()` functions should be a one-to-one function +to a set of Gerrit REST API endpoints. The functions should generate a +`requests.Request` object. + * The `send_*()` functions are adapted to a more specific use case. + +### Gerrit API +* Query options: [https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#query-options](https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#query-options) + +### Appendix I. `extract_gitcookies.py` +This script extracts all pairs of username and password from the gerrit +authentication script from a file or a .gitcookies file. + +The usage of the script is as follows, +```sh +python3 extract_gitcookies.py input_path output_path +``` + +### Appendix II. `MAINTAINERS.json` +This file should be consistent with the file `MAINTAINERS`. diff --git a/util/gerrit-bot/bot.py b/util/gerrit-bot/bot.py new file mode 100644 index 000000000..f63008fd7 --- /dev/null +++ b/util/gerrit-bot/bot.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2020 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gerrit import GerritResponseParser as Parser +from gerrit import GerritRestAPI +from util import add_maintainers_to_change, convert_time_in_seconds + +import time + +import sys +sys.path.append('..') +import maint.lib.maintainers + + +class GerritBotConfig: + def __init__(self, config = {}): + self.__dict__.update(config) + @staticmethod + def DefaultConfig(): + default_config = GerritBotConfig() + default_config.auth_file_path = ".data/auth" + default_config.time_tracker_file_path = ".data/prev_query_time" + # query changes made within 2 days if not specified + default_config.default_query_age = "2d" + default_config.maintainers_file_path = None # the maintainers library + # will figure the path out + default_config.api_entry_point = "https://gem5-review.googlesource.com" + default_config.projects_prefix = "public/gem5" + default_config.query_limit = 1000 # at most 1000 new changes per query + default_config.request_timeout = 10 # seconds + return default_config + +class GerritBot: + def __init__(self, config): + self.config = config + + self.auth = self.__read_auth_file(self.config.auth_file_path) + + # Initalize the Gerrit API Object + self.gerrit_api = GerritRestAPI(self.auth, + self.config.api_entry_point, + self.config.request_timeout) + + self.account_id = self.__get_bot_account_id() + self.maintainers = maint.lib.maintainers.Maintainers.from_file( + self.config.maintainers_file_path) + + def __read_auth_file(self, auth_file_path): + username = "" + password = "" + with open(auth_file_path, "r") as f: + lines = f.readlines() + username = lines[0].strip() + password = lines[1].strip() + return (username, password) + + def __read_time_tracker_file(self, file_path): + prev_query_time = 0 + + try: + with open(file_path, "r") as f: + lines = f.readlines() + prev_query_time = int(float(lines[0].strip())) + except FileNotFoundError: + print(f"warning: cannot find the time tracker file at " + f"`{file_path}`. Previous query time is set to 0.") + except IndexError: + print(f"warning: cannot find the content of the time tracker file " + f"at `{file_path}`. Previous query time is set 0.") + + return prev_query_time + + def __update_time_tracker_file(self, file_path): + prev_query_time = time.time() + with open(file_path, "w") as f: + f.write(f"{prev_query_time}\n") + f.write(f"# The above time is the result of calling time.time() " + f"in Python.") + + def __get_bot_account_id(self): + account_info = Parser.parse(self.gerrit_api.get_account("self")) + return account_info._account_id + + def __query_new_changes(self, query_age): + query = (f"projects:{self.config.projects_prefix} " + f"status:open -is:wip -age:{query_age}") + response = self.gerrit_api.query_changes(query, + self.config.query_limit, + "CURRENT_REVISION") + + if response.status_code >= 300: + print("Error: Couldn't query new Gerrit changes") + print(vars(query_new_gerrit_changes_response)) + raise Error() + + new_changes = Parser.get_json_content(response) + + return new_changes + + def _pre_run(self): + self.prev_query_time = \ + self.__read_time_tracker_file(self.config.time_tracker_file_path) + if self.prev_query_time > 0: + self.query_age = \ + convert_time_in_seconds(int(time.time() - self.prev_query_time)) + else: + self.query_age = self.config.default_query_age + + def _run(self): + new_changes = self.__query_new_changes(self.query_age) + for new_change in new_changes: + add_maintainers_to_change(new_change, self.maintainers, + self.gerrit_api) + + def _post_run(self): + self.__update_time_tracker_file(self.config.time_tracker_file_path) + + def run(self): + self._pre_run() + self._run() + self._post_run() + +if __name__ == "__main__": + default_config = GerritBotConfig.DefaultConfig() + gerrit_bot = GerritBot(default_config) + gerrit_bot.run() diff --git a/util/gerrit-bot/extract_gitcookies.py b/util/gerrit-bot/extract_gitcookies.py new file mode 100755 index 000000000..2ea0468bb --- /dev/null +++ b/util/gerrit-bot/extract_gitcookies.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse + +def parse_gitcookies_line(raw): + # if this is a line from .gitcookies, the delimiter is `\t` + auth_info = raw.strip().split('\t') + if len(auth_info) < 7: + # if this is a line from auth script, the delimiter is `,` + auth_info = raw.strip().split(',') + if len(auth_info) != 7: + return None, None + auth_info = auth_info[-1] + auth_info = auth_info[4:].split("=") + username = auth_info[0] + password = auth_info[1] + return username, password + +def parse_gitcookies(input_path): + username_password_dict = {} + with open(input_path, "r") as input_stream: + for line in input_stream: + username, password = parse_gitcookies_line(line) + if not username: + continue + username_password_dict[username] = password + return username_password_dict + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=("Extract username and password from .gitcookies", + "or from the script used to write .gitcookies file") + parser.add_argument("input", + help = ("Path to a .gitcookies file or a file with ", + "a similar format")) + parser.add_argument("output", help="Path to the output file") + args = parser.parse_args() + username_password_dict = parse_gitcookies(args.input) + with open(args.output, "w") as output_stream: + for username, password in username_password_dict.items(): + output_stream.write(f"{username}\n{password}\n") \ No newline at end of file diff --git a/util/gerrit-bot/gerrit.py b/util/gerrit-bot/gerrit.py new file mode 100644 index 000000000..84f6640d0 --- /dev/null +++ b/util/gerrit-bot/gerrit.py @@ -0,0 +1,109 @@ +# Copyright (c) 2020 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import copy +import json +import requests +from types import SimpleNamespace +from urllib.parse import urljoin + +class GerritResponseParser: + @staticmethod + def get_json_content(response): + assert(isinstance(response, requests.Response)) + + # If the status code is not in the 200s range, it doesn't have content. + if response.status_code >= 300: + return None + + # Transform response.content to a Python3 string. + # response.content is a byte array containing the response. + # The first 4 bytes are b")]}\", which doesn't belong to JSON content. + # The byte array is encoded by utf-8. + content = response.content[4:].decode("utf-8") + json_content = json.loads(content) + return json_content + + # TODO: parsing method for each Gerrit data structure + @staticmethod + def parse(response): + json_content = GerritResponseParser.get_json_content(response) + if not json_content: + return None + return SimpleNamespace(**json_content) + + +class GerritRestAPI: + def __init__(self, auth, api_entry_point, timeout): + self.username = auth[0] + self.password = auth[1] + self.api_entry_point = api_entry_point + self.timeout = timeout + + # helper methods for sending GET and POST requests + def _get(self, endpoint, params = None): + request_url = urljoin(self.api_entry_point, endpoint) + return requests.get(request_url, + params = params, + timeout = self.timeout, + auth = (self.username, self.password)) + def _post(self, endpoint, json_content): + request_url = urljoin(self.api_entry_point, endpoint) + return requests.post(request_url, + json = json_content, + timeout = self.timeout, + auth = (self.username, self.password)) + + # --------------- Account Endpoints --------------- + # https://gerrit-review.googlesource.com/Documentation/ + # rest-api-accounts.html#get-account + def get_account(self, account_id="self"): + """ get an account detail from an account_id """ + return self._get(f"/accounts/{account_id}") + + # --------------- Changes Endpoints --------------- + # https://gerrit-review.googlesource.com/Documentation/ + # rest-api-changes.html#list-changes + def query_changes(self, query, limit=None, optional_field=None): + """ query changes with maximum limit returned queries """ + endpoint = f"/changes/" + params = { "q": query } + if not limit == None: + params["n"] = str(limit) + if not optional_field == None: + params["o"] = optional_field + return self._get(endpoint, params) + + # --------------- Reviewer Endpoints --------------- + # https://gerrit-review.googlesource.com/Documentation/ + # rest-api-changes.html#list-reviewers + def list_reviewers(self, change_id): + """ list reviewers of a change """ + return self._get(f"/changes/{change_id}/reviewers") + def add_reviewer(self, change_id, reviewer_email): + """ add a reviewer using an email address """ + data = {"reviewer": reviewer_email} + return self._post(f"/changes/{change_id}/reviewers/", data) diff --git a/util/gerrit-bot/util.py b/util/gerrit-bot/util.py new file mode 100644 index 000000000..bc7045b0c --- /dev/null +++ b/util/gerrit-bot/util.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2020 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# Utility functions +def parse_commit_subject(subject): + parsed_subject = subject.split(":", maxsplit = 1) + + # If the subject does not have a colon, it either does not have tags + # or does not have a message + if len(parsed_subject) <= 1: + return None, None + + tags = [ tag.strip() for tag in parsed_subject[0].split(",") ] + message = parsed_subject[1] + + return tags, message + +# Convert time in seconds to a plausible unit +def convert_time_in_seconds(delta): + time = int(delta) + time_unit = "s" + + for curr_unit_limit, next_unit in zip([60, 60, 24], ["m", "h", "d"]): + if time <= curr_unit_limit: + break + time = time // curr_unit_limit + 1 + time_unit = next_unit + + return f"{time}{time_unit}" + +# End of Utility functions + +def add_maintainers_to_change(change, maintainers, gerrit_api): + tags, message = parse_commit_subject(change["subject"]) + change_id = change["id"] + maintainer_emails = set() + for tag in tags: + try: + for name, email in maintainers[tag].maintainers: + maintainer_emails.add(email) + except KeyError: + print((f"warning: `change-{change_id}` has an unknown tag: " + f"`{tag}`")) + for email in maintainer_emails: + gerrit_api.add_reviewer(change_id, email)