#!/bin/sh # How to run: # * Install the gh cli and run `gh login`: https://github.com/cli/cli/ # * install black isort usort pyupgrade and whatever other tools you want to # play with in your active virtualenv # * also requires "sponge" from moreutils # * move to a new folder for the script to work in: `mkdir pr_mergability && cd pr_mergability` # * ../scripts/check_mergability.sh # # It'll clone the qutebrowser repo, fetch refs for all the open PRs, checkout # a branch, run auto formatters, try to merge each PR, report back via CSV # how badly each merge filed (via "number of conflicting lines"). # # For details of what auto formatters are ran see the `tools` variable down # near the bottom of the script. # # If you've checked out a branch and ran auto-formatters or whatever on it # manually and just want the script to try to merge all PRs you can call it # with the branch name and it'll do so. Remember to go back up to the work dir # before calling the script. # # If it's been a few days and PRs have been opened or merged delete `prs.json` # from the working dir to have them re-fetched on next run. # If PRs have had updates pushed you'll have to update the refs yourself or # nuke the whole clone in the work dir and let the script re-fetch them all. # requires the github binary, authorized, to list open PRs. command -v gh > /dev/null || { echo "Error: Install the github CLI, gh, make sure it is in PATH and authenticated." exit 1 } # requires some formatting tools available. The are all installable via pip. all_formatters="black isort usort pyupgrade" for cmd in $all_formatters; do command -v $cmd >/dev/null || { echo "Error: Requires all these tools to be in PATH (install them with pip): $all_formatters" exit 1 } done [ -e qutebrowser/app.py ] && { echo "don't run this from your qutebrowser checkout. Run it from a tmp dir, it'll checkout out a new copy to work on" exit 1 } TTY="$(tty)" DO_PAUSE="no" maybepause () { msg="$1" force="$2" if [ -n "$force" ] ;then DO_PAUSE="yes" elif [ "$DO_PAUSE" = "yes" ] ;then true else echo "$1" return fi echo "$1, investigate in another terminal, continue? [Step|Continue|Quit]" read response < $TTY case "$response" in [Cc]*) DO_PAUSE="no";; [Qq]*) exit 0;; *) return;; esac } [ -d qutebrowser ] || { git clone git@github.com:qutebrowser/qutebrowser.git cd qutebrowser git config --local merge.conflictstyle merge git config --local rerere.enabled false cd - } [ -e prs.json ] || { # (re-)fetch list of open PRs. Pull refs for any new ones. # Resets master and qt6-v2 in case they have changed. Does not handle # fetching new changes for updated PRs. echo "fetching open PRs" gh -R qutebrowser/qutebrowser pr list -s open --json number,title,mergeable,updatedAt -L 100 > prs.json cd qutebrowser git fetch git checkout master && git pull git checkout qt6-v2 && git pull # this is slow for a fresh clone, idk how to fetch all pull/*/head refs at once jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do git describe pr/$number >/dev/null 2>&1 || git fetch origin refs/pull/$number/head:pr/$number done cd - } python3 <<"EOF" import json from collections import Counter import rich with open("prs.json") as f: prs=json.load(f) rich.print(Counter([p['mergeable'] for p in prs])) # Counter({'MERGEABLE': 29, 'CONFLICTING': 45}) EOF summary () { # Summarize the accumulated report CSVs # Should be the last thing we do since it goes back up to the report dir cd - >/dev/null python3 <<"EOF" import csv, glob def read_csv(path): with open(path) as f: return list(csv.DictReader(f)) for report in sorted(glob.glob("report-*.csv")): rows = read_csv(report) succeeded = len([row for row in rows if row["state"] == "succeeded"]) failed = len([row for row in rows if row["state"] == "failed"]) print(f"{report} {succeeded=} {failed=}") EOF } prompt_or_summary () { printf "$1 [Yn]: " read ans case "$ans" in [nN]*) summary exit 0 ;; *) true;; esac } # format tool "aliases", where needed usort () { env usort format "$@"; } isort () { env isort -q "$@"; } black () { env black -q "$@"; } pyupgrade () { git ls-files | grep -F .py | xargs pyupgrade --py37-plus; } generate_report () { # checkout a branch, try to merge each of the open PRs, write the results to # a CSV file base="${1:-master}" quiet="$2" rewrite_strategy="$3" cmds="$4" pr="$5" report_file=../report-$base.csv # prefix for working branch when we are going to re-write stuff so we don't # mess up the pr/* branches and have to re-fetch them. [ -n "$rewrite_strategy" ] && { prefix="tmp-rewrite-" report_file=../report-$base-$rewrite_strategy.csv } git checkout -q $base [ -e $report_file ] && [ -z "$quiet" ] && { prompt_or_summary "$report_file exists, overwrite?" } echo "number,updated,title,state,clean,conflicting" > $report_file report () { echo "$1,$2,\"$3\",$4,$5,$6" >> $report_file } head_sha=$(git rev-parse HEAD) jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do [ -n "$pr" ] && [ "$pr" != "$number" ] && continue [ -n "$quiet" ] || echo "trying ${prefix}pr/$number $updated $title" git reset -q --hard $head_sha applies_cleanly_to_master () { number="$1" grep "^$number" ../report-master.csv | grep failed [ $? -eq 1 ] return $? } case "$rewrite_strategy" in merge) applies_cleanly_to_master $number || { echo "pr/$number succeeded already in ../report-master.csv, skipping" continue } merge_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || { report $number $updated "$title" failed 999 999 continue } ;; rebase) # Only attempt branches that actually merge cleanly with master. # Theoretically it wouldn't hurt to do all of them but a) running # black via the filter driver is slow b) rebase_with_formatting needs # some work to handle more errors in that case (the "git commit -qam # 'fix lint" bit at least needs to look for conflict markers) # I'm hardcoding master because of a lack of imagination. applies_cleanly_to_master $number || { echo "pr/$number succeeded already in ../report-master.csv, skipping" continue } rebase_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || { report $number $updated "$title" failed 999 999 continue } ;; '') true ;; *) echo "Unknown rewrite strategy '$rewrite_strategy'" exit 1 ;; esac git merge -q --no-ff --no-edit ${prefix}pr/$number 2>&1 1>/dev/null | grep -v preimage if [ -e .git/MERGE_HEAD ] ;then # merge failed, clean lines staged and conflicting lines in working # tree merged_lines=$(git diff --cached --numstat | awk -F' ' '{sum+=$1;} END{print sum;}') conflicting_lines=$(git diff | sed -n -e '/<<<<<<< HEAD/,/=======$/p' -e '/=======$/,/>>>>>>> pr/p' | wc -l) conflicting_lines=$(($conflicting_lines-4)) # account for markers included in both sed expressions [ -n "$quiet" ] || echo "#$number failed merging merged_lines=$merged_lines conflicting_lines=$conflicting_lines" maybepause "merge of ${prefix}pr/$number into $base failed" git merge --abort report $number $updated "$title" failed $merged_lines $conflicting_lines else [ -n "$quiet" ] || echo "#$number merged fine" #git show HEAD --oneline --stat report $number $updated "$title" succeeded 0 0 fi done } add_smudge_filter () { cmds="$1" # Setup the filters. A "smudge" filter is configured for each tool then we # add the required tools to a gitattributes file. And make sure to clean # it up later. # Running the formatters as filters is slower than running them directly # because they seem to be run on the files serially. TODO: can we # parallelize them? # Maybe just adding a wrapper around the formatters that caches the output # would be simpler. At least then you just have to sit through them once. git config --local filter.rewrite.smudge "filter-cache" printf "*.py" > .git/info/attributes printf " filter=rewrite" >> .git/info/attributes echo >> .git/info/attributes mkdir filter-tools 2>/dev/null cat > filter-tools/filter-cache < "\$inputf" # TODO: de-dup these with the parent script? # Can use aliases here? # Call with the file directly instead of using stdin? usort () { env usort format -; } black () { env black -q -; } isort () { env isort -q -; } pyupgrade () { env pyupgrade --exit-zero-even-if-changed --py37-plus -; } run_with_cache () { inputf="\$1" cmd="\$2" input_hash="\$(sha1sum "\$inputf" | cut -d' ' -f1)" mkdir -p "/tmp/filter-caches/\$cmds/\$cmd" 2>/dev/null outputf="/tmp/filter-caches/\$cmds/\$cmd/\$input_hash" if [ -e "\$outputf" ] ;then lines="\$(wc -l "\$outputf" | cut -d' ' -f1)" # where are these empty output files coming from??? # echo "removing bad cached file '\$outputf'" >&2 [ \$lines -eq 0 ] && rm "\$outputf" fi if ! [ -e "\$outputf" ] ;then \$cmd < "\$inputf" > "\$outputf" [ \$? -eq 0 ] || { echo "\$cmd failed" >&2 cat "\$inputf" return } lines="\$(wc -l "\$outputf" | cut -d' ' -f1)" [ \$lines -eq 0 ] && { echo "tool '\$cmd' produced 0 line output file from '\$inputf'" >&2 } fi cat "\$outputf" } echo "\$cmds" | tr ' ' '\n' | while read cmd; do run_with_cache \$inputf "\$cmd" | sponge \$inputf done cat "\$inputf" rm "\$inputf" EOF chmod +x filter-tools/filter-cache export PATH="$PWD/filter-tools:$PATH" } remove_smudge_filter () { # no need to remove the config or script, it's only active when the # attribute is set rm .git/info/attributes } merge_with_formatting () { number="$1" base="$2" cmds="$3" prefix="${4:-tmp-rewrite-}" strategy="$5" # Use a temp base branch for now but adding "dropme" commits probably isn't the right # strategy for the end goal of letting PR authors adapt to autoformatter # changes. At that point we'll already have a re-formatted master branch. # Unless we can do the merge then rebase-keep-merges-but-drop-dropme or # something. # TODO: swap out this block to be based off of real master or qt-v2 or $base git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number` echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master done echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "dropme! $cmd 2" done git checkout -b ${prefix}pr/$number pr/$number add_smudge_filter "$cmds" git merge -X renormalize tmp-master-rewrite-pr/$number exit_code="$?" remove_smudge_filter if [ $exit_code -eq 0 ] ;then git commit -qam "fix lint" else maybepause "merge of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed" git merge --abort fi git branch -D tmp-master-rewrite-pr/$number [ $exit_code -eq 0 ] || return $exit_code git checkout -q $base } rebase_with_formatting () { number="$1" base="$2" cmds="$3" prefix="${4:-tmp-rewrite-}" strategy="$5" # We need to apply formatting to PRs and base them on a reformatted base # branch. # I haven't looked into doing that via a merge but here is an attempt # doing a rebase. # Rebasing directly on to a formatted branch will fail very easily when it # runs into a formatting change. So I'm using git's "filter" attribute to # apply the same formatter to the trees corresponding to the # commits being rebased. Hopefully if we apply the same formatter to the # base branch and to the individual commits from the PRs we can minimize # conflicts. # An alternative to using the filter attribute might be to use something # like the "darker" tool to re-write the commits. I suspect that won't # help with conflicts in the context around changes though. # Checkout the parent commit of the branch then apply formatting tools to # it. This will provide a target for rebasing which doesn't have any # additional drift from changes to master. After that then we can rebase # the re-written PR branch to the more current, autoformatted, master. # TODO: It might be possible to skip the intermediate base branch. git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number` echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master done # Occasionally we get situations where black and pyupgrade build on each # other to enable further changes. So the order you run them matters. But we # have situations where each one enables the other, in both orders. So we # run them all yet again to pick up any lingering changes from the first # run. # If we don't do this the leftover changes can be picked up by the smudge # filter during the first rebase below and added to "fix lint" commits. Then # since they don't have "dropme!" in the messages they stick in the branch # and end up conflicting with the base branch. echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "dropme! $cmd 2" done git checkout -b ${prefix}pr/$number pr/$number add_smudge_filter "$cmds" git rebase -q -X renormalize tmp-master-rewrite-pr/$number exit_code="$?" remove_smudge_filter [ $exit_code -eq 0 ] || { maybepause "rebase -X renormalize of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed" git rebase --abort } git branch -D tmp-master-rewrite-pr/$number [ $exit_code -eq 0 ] || return $exit_code if [ "$strategy" = "rebase" ] ;then # now transplant onto the actual upstream branch -- might have to drop this # if it causes problems. EDITOR='sed -i /dropme/d' git rebase -qi "$base" || { maybepause "rebase of ${prefix}pr/$number onto $base failed" git rebase --abort return 1 } fi git checkout -q $base } cd qutebrowser # run as `$0 some-branch` to report on merging all open PRs to a branch you # made yourself. Otherwise run without args to try with a bunch of builtin # configurations. strategy="" pull_request="" while [ -n "$1" ] ;do case "$1" in -s|--rewrite-strategy) shift [ -n "$1" ] || { echo "What strategy?" exit 1 } strategy="$1" ;; -p|--pull-request) shift [ -n "$1" ] || { echo "Which PR?" exit 1 } pull_request="$1" ;; -*) echo "Unknown argument '$1'" exit 1 ;; *) break ;; esac shift done if [ -n "$1" ] ;then generate_report "$1" else clean_branches () { # only clean up tmp- branches in case I run it on my main qutebrowser # checkout by mistake :) git checkout master git reset --hard origin/master git branch -l | grep tmp- | grep -v detached | while read l; do git branch -qD $l ;done } # pre-defined auto-formatter configurations. Branches will be created as # needed. # format: branch tool1 tool2 ... tools_all="master true tmp-black black tmp-black_isort black isort tmp-black_usort black usort tmp-black_pyupgrade black pyupgrade tmp-black_isort_pyupgrade black isort pyupgrade tmp-black_usort_pyupgrade black usort pyupgrade qt6-v2 true" tools="tmp-black_isort_pyupgrade black isort pyupgrade" if [ "$(echo "$tools" | wc -l | cut -d' ' -f1)" -gt 1 ] ;then # TODO: turn this "run it with all tool configurations and see which one # is the worst" thing into a CLI option. This script is a cross between # "gather stats" and "refine merge strategies" now and is in need of a bit # of a refactor. prompt_or_summary "Generate report for all tool configurations?" fi clean_branches echo "$tools" | while read branch cmds; do echo "$branch" git checkout -q "$branch" 2>/dev/null || git checkout -q -b "$branch" origin/master echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "$cmd" done # Occasionally we get situations where black and pyupgrade build on each # other to enable further changes. So the order you run them matters. But we # have situations where each one enables the other, in both orders. So we # run them all yet again to pick up any lingering changes from the first # run. echo "$cmds" | tr ' ' '\n' | while read cmd; do $cmd qutebrowser tests git commit -am "$cmd second run" done generate_report "$branch" y "$strategy" "$cmds" "$pull_request" done fi summary # todo: # * see if we can run formatters on PR branches before/while merging # * do most stuff based off of qt6-v2 instead of master, not like most PRs # will be merged to pre-3.0 master anyway # * for strategies where we skip PRs that failed in master include them in the # report to for reference. With a marker to that affect and a total diffstat # so we can see how big they are # * *try the more simplistic "Run the formatter on all PR branches then merge" # instead of trying to do it via a rebase* # * try rebasing them to an autoformatted qt6-v2 branch # notes: # after merging qt6-v2 would merging old PRs to old master then somehow merging # the PR merge commit up to the new master easier than rebasing the PR? # there is a filter attribute you can use to re-write files before committing. # For this use case probably the same as rebase -i --exec then merge? # >See "Merging branches with differing checkin/checkout attributes" in gitattributes(5) # if we go with the strategy of rebasing PRs on formatted commits how to deal # with stopping isort making import loops on every damn PR. Still need to try # rebasing directly on the latest formatted master instead of doing the # intermediated one.