From 04741e4dccc25340417d40d1dc73043023dfc2e1 Mon Sep 17 00:00:00 2001 From: Katrin Ihler Date: Fri, 7 Feb 2025 18:55:59 +0100 Subject: [PATCH] WIP --- release-management/create-changelog/README.md | 72 +++++++++++++++ .../create-changelog/changelog.py | 89 ------------------- .../create-changelog/changelog_major.py | 36 ++++++++ .../create-changelog/changelog_minor.py | 36 ++++++++ .../create-changelog/lib/get_from_github.py | 44 +++++++++ .../lib/print_pull_requests.py | 43 +++++++++ release-management/create-changelog/readme.md | 75 ---------------- 7 files changed, 231 insertions(+), 164 deletions(-) create mode 100644 release-management/create-changelog/README.md delete mode 100644 release-management/create-changelog/changelog.py create mode 100644 release-management/create-changelog/changelog_major.py create mode 100644 release-management/create-changelog/changelog_minor.py create mode 100644 release-management/create-changelog/lib/get_from_github.py create mode 100644 release-management/create-changelog/lib/print_pull_requests.py delete mode 100644 release-management/create-changelog/readme.md diff --git a/release-management/create-changelog/README.md b/release-management/create-changelog/README.md new file mode 100644 index 0000000..60544d1 --- /dev/null +++ b/release-management/create-changelog/README.md @@ -0,0 +1,72 @@ +Generate Changelog for Opencast +=============================== + +These scripts generate a changelog based on the merged pull requests. The entries are sorted by merge date (descending). +For stable releases, changes merged into the legacy version are also included, but marked with an *. + +**Example Result:** + +``` +- [[#4945](https://github.com/opencast/opencast/pull/4945)] - + Drop orphan statistics database index +``` + +Changelog for N.0 version +------------------------- + +Generate a list of changes since the `N-1.0` release with + + python changelog_major.py N + [] + +This includes changes to `develop` between the branch cuts for `N-1` and `N` as well as the changes to `r/N.x` since +the branch cut for `N`. Changes to the legacy version since the release of `N-1.0` are included with an *. + +The date of the last release can easily be checked on GitHub. The dates of the branch cuts _should_ be listed in the +relevant release schedule in the documentation as part of the release notes. If they're not (or they're incorrect), you +can find out the branch cut date by finding the earliest commit belonging to both branches like this: + + git log --pretty=%as -n1 $(diff -u <(git rev-list --first-parent r/N.x) <(git rev-list --first-parent develop) | sed -ne 's/^ //p' | head -1) + +Please check afterwards if pull requests merged right on one of the edge of one of the passed dates should be included +in the changelog. + +Example for 17.0: + + python changelog_major.py 17 2024-05-06 2024-11-06 2024-06-12 + +Changelog for N.x version +------------------------- + +Generate a list of the changes since the last N.x-1 release with + + python changelog_minor.py N [] + +Set `include-legacy` to `True` for releases for the stable version, this will then automatically include changes to the +legacy version during the same time-frame. + +The date of the last release can easily be checked on GitHub. Please note that PRs merged directly on the day of the +last release might already be contained in that changelog, so check for duplicates. + +Example for 17.2: + + python changelog_minor.py 17 True 2025-01-23 + +A note about GitHub's rate limiting +----------------------------------- + +If you run into GitHub's API limit and encounter an error like this: + + [[#3903](https://github.com/opencast/opencast/pull/3903)] - + Common persistence util classes that also implement transaction retries + Traceback (most recent call last): + File "opencast-helper-scripts/release-management/create-changelog/changelog.py", line 62, in + main(branch, start_date, end_date, pat) + File "opencast-helper-scripts/release-management/create-changelog/changelog.py", line 34, in main + merged = pr.get('merged_at') + ^^^^^^ + AttributeError: 'str' object has no attribute 'get' + +you raise the limit by creating a [Personal Access Token](https://github.com/settings/tokens) with +read-only access to the upstream repo and passing that as the *last* parameter (which means in that case you _need_ to +pass all prior arguments as well). \ No newline at end of file diff --git a/release-management/create-changelog/changelog.py b/release-management/create-changelog/changelog.py deleted file mode 100644 index 1abdcc9..0000000 --- a/release-management/create-changelog/changelog.py +++ /dev/null @@ -1,89 +0,0 @@ -import re -import requests -import sys -from datetime import datetime -from dateutil.parser import parse - -URL = 'https://api.github.com/repos/opencast/opencast/pulls' \ - '?state=closed&base=' -JIRA_TICKET_URL = 'https://opencast.jira.com/browse/' - - -def main(branch, start_date, end_date, pat): - begin = parse(start_date).replace(tzinfo=None) - end = parse(end_date).replace(tzinfo=None) if end_date else datetime.now() - next_url = URL + branch - pullrequests = [] - - # Auth? - headers = {'Authorization': f'Bearer {pat}'} if pat else {} - - # get all closed pull request for a specific branch - while next_url: - result = requests.get(next_url, headers=headers) - link_header = result.headers.get('Link') - next_url = None - if link_header: - match = re.search('<([^>]*)>; rel="next"', link_header) - if match: - next_url = match.group(1) - pullrequests += result.json() - - # filter by merge date - bot_pullrequests = [] - for pr in pullrequests: - merged = pr.get('merged_at') - if not merged: - continue # pull request was canceled - merged = parse(merged).replace(tzinfo=None) - if begin <= merged <= end: - # Print (dependa-)bot pull requests seperately - user_type = pr.get('user').get('type') - if user_type == 'Bot': - bot_pullrequests.append(pr) - continue - - link = pr.get('html_url') - title = pr.get('title').strip() - nr = pr.get('number') - pretty_print(title, nr, link) - - if len(bot_pullrequests) > 0: - print('
Dependency updates\n') - print('
    ') - for bot_pr in bot_pullrequests: - link = bot_pr.get('html_url') - title = bot_pr.get('title').strip() - nr = bot_pr.get('number') - bot_pretty_print(title, nr, link) - print('
') - print('
') - - -def pretty_print(title, pr_number, pr_link): - title = pretty_print_title(title) - print('- [[#%s](%s)] -\n %s' % (pr_number, pr_link, title)) - -def bot_pretty_print(title, pr_number, pr_link): - title = pretty_print_title(title) - print('
  • [%s] - \n %s
  • ' % (pr_link, pr_number, title)) - -def pretty_print_title(title): - return re.sub(r'^\S*[mM][hH]-\d{3,5}[,: ]*', '', title) - -if __name__ == '__main__': - argc = len(sys.argv) - if 3 <= argc <= 5: - branch = sys.argv[1] - start_date = sys.argv[2] - end_date = None - pat = None - if argc >= 4: - end_date = sys.argv[3] - if argc == 5: - pat = sys.argv[4] - - main(branch, start_date, end_date, pat) - else: - binary = sys.argv[0] - print(f'Usage: {binary} branch start-date [end-date] [github pat]') diff --git a/release-management/create-changelog/changelog_major.py b/release-management/create-changelog/changelog_major.py new file mode 100644 index 0000000..2340f0e --- /dev/null +++ b/release-management/create-changelog/changelog_major.py @@ -0,0 +1,36 @@ +import sys +from dateutil.parser import parse + +from lib.get_from_github import get_prs_from_github +from lib.print_pull_requests import print_pull_requests, filter_and_sort_prs + + +def create_changelog(version, last_branch_cut, this_branch_cut, last_release, this_release=None, gh_token=None): + # parse args + version = int(version) + date_last_branch_cut = parse(last_branch_cut).replace(tzinfo=None) + date_this_branch_cut = parse(this_branch_cut).replace(tzinfo=None) + date_last_release = parse(last_release).replace(tzinfo=None) + date_this_release_opt = parse(this_release).replace(tzinfo=None) if this_release else None + + # get PRs from GitHub + pull_requests = get_prs_from_github(date_last_branch_cut, date_this_branch_cut, gh_token, 'develop') + pull_requests += get_prs_from_github(date_this_branch_cut, date_this_release_opt, gh_token, version) + + legacy_pull_requests = get_prs_from_github(date_last_release, date_this_release_opt, gh_token, version - 1) + for pr in legacy_pull_requests: + pr['legacy'] = True + pull_requests += legacy_pull_requests + + # print + print_pull_requests(*filter_and_sort_prs(pull_requests)) + + +if __name__ == '__main__': + argc = len(sys.argv) + if 5 <= argc <= 7: + create_changelog(*sys.argv[1:]) + else: + binary = sys.argv[0] + print(f'Usage: {binary} version(int) last-branch-cut(date) this-branch-cut(date) last-release(date) ' + f'[this-release(date)] [github-token]') diff --git a/release-management/create-changelog/changelog_minor.py b/release-management/create-changelog/changelog_minor.py new file mode 100644 index 0000000..3c6aca7 --- /dev/null +++ b/release-management/create-changelog/changelog_minor.py @@ -0,0 +1,36 @@ +import sys +from dateutil.parser import parse + +from lib.get_from_github import get_prs_from_github +from lib.print_pull_requests import print_pull_requests, filter_and_sort_prs + + +def create_changelog(version, include_legacy, start_date, end_date=None, gh_token=None): + # parse args + version = int(version) + include_legacy = bool(include_legacy) + date_last_release = parse(start_date).replace(tzinfo=None) + date_this_release_opt = parse(end_date).replace(tzinfo=None) if end_date else None + + # get PRs from GitHub + pull_requests = get_prs_from_github(date_last_release, date_this_release_opt, gh_token, version) + + # check prior version as well + if include_legacy: + legacy_pull_requests = get_prs_from_github(date_last_release, date_this_release_opt, gh_token, version - 1) + for pr in legacy_pull_requests: + pr['legacy'] = True + pull_requests = pull_requests + legacy_pull_requests + + # print + print_pull_requests(*filter_and_sort_prs(pull_requests)) + + +if __name__ == '__main__': + argc = len(sys.argv) + if 4 <= argc <= 6: + create_changelog(*sys.argv[1:]) + else: + binary = sys.argv[0] + print(f'Usage: {binary} version(int) include-legacy(bool) last-release(date) [this-release(date)]' + f'[github-token]') diff --git a/release-management/create-changelog/lib/get_from_github.py b/release-management/create-changelog/lib/get_from_github.py new file mode 100644 index 0000000..e95be48 --- /dev/null +++ b/release-management/create-changelog/lib/get_from_github.py @@ -0,0 +1,44 @@ +import re +import requests +from dateutil.parser import parse + + +def get_branch(version): + return 'r/' + str(version) + '.x' + + +def get_prs_from_github(start_date, end_date, pattern, branch): + if isinstance(branch, int): + branch = get_branch(branch) + next_url = 'https://api.github.com/repos/opencast/opencast/pulls' \ + '?state=closed&sort=updated&direction=desc&per_page=100&base=' \ + + branch + headers = {'Authorization': f'Bearer {pattern}'} if pattern else {} + + pull_requests = [] + while next_url: + result = requests.get(next_url, headers=headers) + parsed_results = result.json() + # filter by merged date + for pr in parsed_results: + if pr.get('merged_at'): + merged = parse(pr.get('merged_at')).replace(tzinfo=None) + if start_date <= merged and (not end_date or merged <= end_date): + pr['merged_date'] = merged # use the parsed merge date for sorting later + pull_requests.append(pr) + + # check if there are more + next_url = None + link_header = result.headers.get('Link') + if link_header: + match = re.search('<([^>]*)>; rel="next"', link_header) + if match: + next_url = match.group(1) + + # check when last pr in results was updated - if it's out of our desired date range, we can stop here + # (all merges are updates, but not all updates are merges) + updated = parse(parsed_results[-1].get('updated_at')).replace(tzinfo=None) + if updated < start_date: + next_url = None + + return pull_requests diff --git a/release-management/create-changelog/lib/print_pull_requests.py b/release-management/create-changelog/lib/print_pull_requests.py new file mode 100644 index 0000000..fb2d768 --- /dev/null +++ b/release-management/create-changelog/lib/print_pull_requests.py @@ -0,0 +1,43 @@ +import re + + +def __normal_pretty_print(title, pr_number, pr_link, legacy): + legacy_str = '\\*' if legacy else '' + title = __pretty_print_title(title, legacy_str) + print('- [[#%s](%s)] -\n %s' % (pr_number, pr_link, title)) + + +def __bot_pretty_print(title, pr_number, pr_link, legacy): + legacy_str = '*' if legacy else '' + title = __pretty_print_title(title, legacy_str) + print('
  • [%s] - \n %s
  • ' % (pr_link, pr_number, title)) + + +def __pretty_print_title(title, legacy_str): + return re.sub(r'^\S*[mM][hH]-\d{3,5}[,: ]*', '', title) + legacy_str + + +def filter_and_sort_prs(pull_requests): + # filter by type + normal_pull_requests = [pr for pr in pull_requests if pr.get('user').get('type') != 'Bot'] + bot_pull_requests = [pr for pr in pull_requests if pr.get('user').get('type') == 'Bot'] + + # sort by merged date + normal_pull_requests.sort(key=lambda p: p.get('merged_date'), reverse=True) + bot_pull_requests.sort(key=lambda p: p.get('merged_date'), reverse=True) + return normal_pull_requests, bot_pull_requests + + +def print_pull_requests(normal_pull_requests, bot_pull_requests): + # print results + for pr in normal_pull_requests: + __normal_pretty_print(pr.get('title').strip(), pr.get('number'), pr.get('html_url'), pr.get('legacy')) + + if len(bot_pull_requests) > 0: + print('
    Dependency updates\n') + print('
      ') + for bot_pr in bot_pull_requests: + __bot_pretty_print(bot_pr.get('title').strip(), bot_pr.get('number'), bot_pr.get('html_url'), + bot_pr.get('legacy')) + print('
    ') + print('
    ') diff --git a/release-management/create-changelog/readme.md b/release-management/create-changelog/readme.md deleted file mode 100644 index 6425074..0000000 --- a/release-management/create-changelog/readme.md +++ /dev/null @@ -1,75 +0,0 @@ -Generate Changelog for Opencast -=============================== - -This script generated a changelog based on merged pull requests. To generate a -changelog for a given version, run the script with the git branch name, start -date and optionally end date as arguments. - -Example for 14.x ---------------- - -``` -- [[#4945](https://github.com/opencast/opencast/pull/4945)] - - Drop orphan statistics database index -``` - -Changelog for N.x version -------------------------- - - python changelog.py r/N.x - -Changelog for N.0 version -------------------------- - -Since these versions are developed on both `develop` and their specific release -branched, two requests need to be made and merged: - - python changelog.py develop - python changelog.py r/N.x - -Note that the Github API may generate duplicate entries between the two lists depending on dates and timezones. - -Dates ------ -Computing the dates can be annoying. You need to find the earliest commit belonging to various combinations of branches. - - git log --pretty=%as -n1 $(diff -u <(git rev-list --first-parent r/N.x) <(git rev-list --first-parent develop) | sed -ne 's/^ //p' | head -1) - -As an example, to generate the full list for Opencast 14 you need to know - - - The changelog for `develop` between the `r/13.x` branching off, and `r/14.x` being started - - The changelog for `r/14.x` up to `14.0` - -To find the first begin-of-development date - - git log --pretty=%as -n1 $(diff -u <(git rev-list --first-parent r/13.x) <(git rev-list --first-parent develop) | sed -ne 's/^ //p' | head -1) - -To find the 14.x branch date - - git log --pretty=%as -n1 $(diff -u <(git rev-list --first-parent r/14.x) <(git rev-list --first-parent develop) | sed -ne 's/^ //p' | head -1) - -So the final changelog calls would be - - % python changelog.py develop 2022-11-16 2023-05-15 - % python changelog.py r/14.x 2023-05-15 - -API Limits ----------- - -Github enforces API limits, which this script can easily hit - especially if you run it multiple times when debugging! -In such cases, it is possible to encounter an error when running the script, leading to an incorrect output - - [[#3903](https://github.com/opencast/opencast/pull/3903)] - - Common persistence util classes that also implement transaction retries - Traceback (most recent call last): - File "opencast-helper-scripts/release-management/create-changelog/changelog.py", line 62, in - main(branch, start_date, end_date, pat) - File "opencast-helper-scripts/release-management/create-changelog/changelog.py", line 34, in main - merged = pr.get('merged_at') - ^^^^^^ - AttributeError: 'str' object has no attribute 'get' - -To raise this limit, you may need to create a [Personal Access Token](https://github.com/settings/tokens) with appropriate permissions (read only to the -upstream repo), and pass that as the *fifth* parameter. So the call with the token would be - - % python changelog.py develop 2022-11-16 2023-05-15 YOUR_TOKEN \ No newline at end of file