This commit is contained in:
toofar 2026-01-07 16:40:23 -08:00 committed by GitHub
commit f2e7f27ee3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 549 additions and 0 deletions

549
scripts/check_mergability.sh Executable file
View File

@ -0,0 +1,549 @@
#!/bin/sh
# How to run:
# * Install the gh cli and run `gh login`: https://github.com/cli/cli/
# * install black isort usort pyupgrade and whatever other tools you want to
# play with in your active virtualenv
# * also requires "sponge" from moreutils
# * move to a new folder for the script to work in: `mkdir pr_mergability && cd pr_mergability`
# * ../scripts/check_mergability.sh
#
# It'll clone the qutebrowser repo, fetch refs for all the open PRs, checkout
# a branch, run auto formatters, try to merge each PR, report back via CSV
# how badly each merge filed (via "number of conflicting lines").
#
# For details of what auto formatters are ran see the `tools` variable down
# near the bottom of the script.
#
# If you've checked out a branch and ran auto-formatters or whatever on it
# manually and just want the script to try to merge all PRs you can call it
# with the branch name and it'll do so. Remember to go back up to the work dir
# before calling the script.
#
# If it's been a few days and PRs have been opened or merged delete `prs.json`
# from the working dir to have them re-fetched on next run.
# If PRs have had updates pushed you'll have to update the refs yourself or
# nuke the whole clone in the work dir and let the script re-fetch them all.
# requires the github binary, authorized, to list open PRs.
command -v gh > /dev/null || {
echo "Error: Install the github CLI, gh, make sure it is in PATH and authenticated."
exit 1
}
# requires some formatting tools available. The are all installable via pip.
all_formatters="black isort usort pyupgrade"
for cmd in $all_formatters; do
command -v $cmd >/dev/null || {
echo "Error: Requires all these tools to be in PATH (install them with pip): $all_formatters"
exit 1
}
done
[ -e qutebrowser/app.py ] && {
echo "don't run this from your qutebrowser checkout. Run it from a tmp dir, it'll checkout out a new copy to work on"
exit 1
}
TTY="$(tty)"
DO_PAUSE="no"
maybepause () {
msg="$1"
force="$2"
if [ -n "$force" ] ;then
DO_PAUSE="yes"
elif [ "$DO_PAUSE" = "yes" ] ;then
true
else
echo "$1"
return
fi
echo "$1, investigate in another terminal, continue? [Step|Continue|Quit]"
read response < $TTY
case "$response" in
[Cc]*) DO_PAUSE="no";;
[Qq]*) exit 0;;
*) return;;
esac
}
[ -d qutebrowser ] || {
git clone git@github.com:qutebrowser/qutebrowser.git
cd qutebrowser
git config --local merge.conflictstyle merge
git config --local rerere.enabled false
cd -
}
[ -e prs.json ] || {
# (re-)fetch list of open PRs. Pull refs for any new ones.
# Resets master and qt6-v2 in case they have changed. Does not handle
# fetching new changes for updated PRs.
echo "fetching open PRs"
gh -R qutebrowser/qutebrowser pr list -s open --json number,title,mergeable,updatedAt -L 100 > prs.json
cd qutebrowser
git fetch
git checkout master && git pull
git checkout qt6-v2 && git pull
# this is slow for a fresh clone, idk how to fetch all pull/*/head refs at once
jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
git describe pr/$number >/dev/null 2>&1 || git fetch origin refs/pull/$number/head:pr/$number
done
cd -
}
python3 <<"EOF"
import json
from collections import Counter
import rich
with open("prs.json") as f: prs=json.load(f)
rich.print(Counter([p['mergeable'] for p in prs]))
# Counter({'MERGEABLE': 29, 'CONFLICTING': 45})
EOF
summary () {
# Summarize the accumulated report CSVs
# Should be the last thing we do since it goes back up to the report dir
cd - >/dev/null
python3 <<"EOF"
import csv, glob
def read_csv(path):
with open(path) as f:
return list(csv.DictReader(f))
for report in sorted(glob.glob("report-*.csv")):
rows = read_csv(report)
succeeded = len([row for row in rows if row["state"] == "succeeded"])
failed = len([row for row in rows if row["state"] == "failed"])
print(f"{report} {succeeded=} {failed=}")
EOF
}
prompt_or_summary () {
printf "$1 [Yn]: "
read ans
case "$ans" in
[nN]*)
summary
exit 0
;;
*) true;;
esac
}
# format tool "aliases", where needed
usort () { env usort format "$@"; }
isort () { env isort -q "$@"; }
black () { env black -q "$@"; }
pyupgrade () { git ls-files | grep -F .py | xargs pyupgrade --py37-plus; }
generate_report () {
# checkout a branch, try to merge each of the open PRs, write the results to
# a CSV file
base="${1:-master}"
quiet="$2"
rewrite_strategy="$3"
cmds="$4"
pr="$5"
report_file=../report-$base.csv
# prefix for working branch when we are going to re-write stuff so we don't
# mess up the pr/* branches and have to re-fetch them.
[ -n "$rewrite_strategy" ] && {
prefix="tmp-rewrite-"
report_file=../report-$base-$rewrite_strategy.csv
}
git checkout -q $base
[ -e $report_file ] && [ -z "$quiet" ] && {
prompt_or_summary "$report_file exists, overwrite?"
}
echo "number,updated,title,state,clean,conflicting" > $report_file
report () {
echo "$1,$2,\"$3\",$4,$5,$6" >> $report_file
}
head_sha=$(git rev-parse HEAD)
jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
[ -n "$pr" ] && [ "$pr" != "$number" ] && continue
[ -n "$quiet" ] || echo "trying ${prefix}pr/$number $updated $title"
git reset -q --hard $head_sha
applies_cleanly_to_master () {
number="$1"
grep "^$number" ../report-master.csv | grep failed
[ $? -eq 1 ]
return $?
}
case "$rewrite_strategy" in
merge)
applies_cleanly_to_master $number || {
echo "pr/$number succeeded already in ../report-master.csv, skipping"
continue
}
merge_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
report $number $updated "$title" failed 999 999
continue
}
;;
rebase)
# Only attempt branches that actually merge cleanly with master.
# Theoretically it wouldn't hurt to do all of them but a) running
# black via the filter driver is slow b) rebase_with_formatting needs
# some work to handle more errors in that case (the "git commit -qam
# 'fix lint" bit at least needs to look for conflict markers)
# I'm hardcoding master because of a lack of imagination.
applies_cleanly_to_master $number || {
echo "pr/$number succeeded already in ../report-master.csv, skipping"
continue
}
rebase_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
report $number $updated "$title" failed 999 999
continue
}
;;
'')
true
;;
*)
echo "Unknown rewrite strategy '$rewrite_strategy'"
exit 1
;;
esac
git merge -q --no-ff --no-edit ${prefix}pr/$number 2>&1 1>/dev/null | grep -v preimage
if [ -e .git/MERGE_HEAD ] ;then
# merge failed, clean lines staged and conflicting lines in working
# tree
merged_lines=$(git diff --cached --numstat | awk -F' ' '{sum+=$1;} END{print sum;}')
conflicting_lines=$(git diff | sed -n -e '/<<<<<<< HEAD/,/=======$/p' -e '/=======$/,/>>>>>>> pr/p' | wc -l)
conflicting_lines=$(($conflicting_lines-4)) # account for markers included in both sed expressions
[ -n "$quiet" ] || echo "#$number failed merging merged_lines=$merged_lines conflicting_lines=$conflicting_lines"
maybepause "merge of ${prefix}pr/$number into $base failed"
git merge --abort
report $number $updated "$title" failed $merged_lines $conflicting_lines
else
[ -n "$quiet" ] || echo "#$number merged fine"
#git show HEAD --oneline --stat
report $number $updated "$title" succeeded 0 0
fi
done
}
add_smudge_filter () {
cmds="$1"
# Setup the filters. A "smudge" filter is configured for each tool then we
# add the required tools to a gitattributes file. And make sure to clean
# it up later.
# Running the formatters as filters is slower than running them directly
# because they seem to be run on the files serially. TODO: can we
# parallelize them?
# Maybe just adding a wrapper around the formatters that caches the output
# would be simpler. At least then you just have to sit through them once.
git config --local filter.rewrite.smudge "filter-cache"
printf "*.py" > .git/info/attributes
printf " filter=rewrite" >> .git/info/attributes
echo >> .git/info/attributes
mkdir filter-tools 2>/dev/null
cat > filter-tools/filter-cache <<EOF
#!/bin/sh
# Script to add as filter for git while rebasing.
# Runs the configured tools in sequence, caches the result of each tool in
# case you find yourself running through this proecss lots while working on
# it.
cmds="$cmds"
inputf="\$(mktemp --suffix=rebase)"
cat > "\$inputf"
# TODO: de-dup these with the parent script?
# Can use aliases here?
# Call with the file directly instead of using stdin?
usort () { env usort format -; }
black () { env black -q -; }
isort () { env isort -q -; }
pyupgrade () { env pyupgrade --exit-zero-even-if-changed --py37-plus -; }
run_with_cache () {
inputf="\$1"
cmd="\$2"
input_hash="\$(sha1sum "\$inputf" | cut -d' ' -f1)"
mkdir -p "/tmp/filter-caches/\$cmds/\$cmd" 2>/dev/null
outputf="/tmp/filter-caches/\$cmds/\$cmd/\$input_hash"
if [ -e "\$outputf" ] ;then
lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
# where are these empty output files coming from???
# echo "removing bad cached file '\$outputf'" >&2
[ \$lines -eq 0 ] && rm "\$outputf"
fi
if ! [ -e "\$outputf" ] ;then
\$cmd < "\$inputf" > "\$outputf"
[ \$? -eq 0 ] || {
echo "\$cmd failed" >&2
cat "\$inputf"
return
}
lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
[ \$lines -eq 0 ] && {
echo "tool '\$cmd' produced 0 line output file from '\$inputf'" >&2
}
fi
cat "\$outputf"
}
echo "\$cmds" | tr ' ' '\n' | while read cmd; do
run_with_cache \$inputf "\$cmd" | sponge \$inputf
done
cat "\$inputf"
rm "\$inputf"
EOF
chmod +x filter-tools/filter-cache
export PATH="$PWD/filter-tools:$PATH"
}
remove_smudge_filter () {
# no need to remove the config or script, it's only active when the
# attribute is set
rm .git/info/attributes
}
merge_with_formatting () {
number="$1"
base="$2"
cmds="$3"
prefix="${4:-tmp-rewrite-}"
strategy="$5"
# Use a temp base branch for now but adding "dropme" commits probably isn't the right
# strategy for the end goal of letting PR authors adapt to autoformatter
# changes. At that point we'll already have a re-formatted master branch.
# Unless we can do the merge then rebase-keep-merges-but-drop-dropme or
# something.
# TODO: swap out this block to be based off of real master or qt-v2 or $base
git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
done
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "dropme! $cmd 2"
done
git checkout -b ${prefix}pr/$number pr/$number
add_smudge_filter "$cmds"
git merge -X renormalize tmp-master-rewrite-pr/$number
exit_code="$?"
remove_smudge_filter
if [ $exit_code -eq 0 ] ;then
git commit -qam "fix lint"
else
maybepause "merge of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
git merge --abort
fi
git branch -D tmp-master-rewrite-pr/$number
[ $exit_code -eq 0 ] || return $exit_code
git checkout -q $base
}
rebase_with_formatting () {
number="$1"
base="$2"
cmds="$3"
prefix="${4:-tmp-rewrite-}"
strategy="$5"
# We need to apply formatting to PRs and base them on a reformatted base
# branch.
# I haven't looked into doing that via a merge but here is an attempt
# doing a rebase.
# Rebasing directly on to a formatted branch will fail very easily when it
# runs into a formatting change. So I'm using git's "filter" attribute to
# apply the same formatter to the trees corresponding to the
# commits being rebased. Hopefully if we apply the same formatter to the
# base branch and to the individual commits from the PRs we can minimize
# conflicts.
# An alternative to using the filter attribute might be to use something
# like the "darker" tool to re-write the commits. I suspect that won't
# help with conflicts in the context around changes though.
# Checkout the parent commit of the branch then apply formatting tools to
# it. This will provide a target for rebasing which doesn't have any
# additional drift from changes to master. After that then we can rebase
# the re-written PR branch to the more current, autoformatted, master.
# TODO: It might be possible to skip the intermediate base branch.
git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
done
# Occasionally we get situations where black and pyupgrade build on each
# other to enable further changes. So the order you run them matters. But we
# have situations where each one enables the other, in both orders. So we
# run them all yet again to pick up any lingering changes from the first
# run.
# If we don't do this the leftover changes can be picked up by the smudge
# filter during the first rebase below and added to "fix lint" commits. Then
# since they don't have "dropme!" in the messages they stick in the branch
# and end up conflicting with the base branch.
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "dropme! $cmd 2"
done
git checkout -b ${prefix}pr/$number pr/$number
add_smudge_filter "$cmds"
git rebase -q -X renormalize tmp-master-rewrite-pr/$number
exit_code="$?"
remove_smudge_filter
[ $exit_code -eq 0 ] || {
maybepause "rebase -X renormalize of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
git rebase --abort
}
git branch -D tmp-master-rewrite-pr/$number
[ $exit_code -eq 0 ] || return $exit_code
if [ "$strategy" = "rebase" ] ;then
# now transplant onto the actual upstream branch -- might have to drop this
# if it causes problems.
EDITOR='sed -i /dropme/d' git rebase -qi "$base" || {
maybepause "rebase of ${prefix}pr/$number onto $base failed"
git rebase --abort
return 1
}
fi
git checkout -q $base
}
cd qutebrowser
# run as `$0 some-branch` to report on merging all open PRs to a branch you
# made yourself. Otherwise run without args to try with a bunch of builtin
# configurations.
strategy=""
pull_request=""
while [ -n "$1" ] ;do
case "$1" in
-s|--rewrite-strategy)
shift
[ -n "$1" ] || {
echo "What strategy?"
exit 1
}
strategy="$1"
;;
-p|--pull-request)
shift
[ -n "$1" ] || {
echo "Which PR?"
exit 1
}
pull_request="$1"
;;
-*)
echo "Unknown argument '$1'"
exit 1
;;
*)
break
;;
esac
shift
done
if [ -n "$1" ] ;then
generate_report "$1"
else
clean_branches () {
# only clean up tmp- branches in case I run it on my main qutebrowser
# checkout by mistake :)
git checkout master
git reset --hard origin/master
git branch -l | grep tmp- | grep -v detached | while read l; do git branch -qD $l ;done
}
# pre-defined auto-formatter configurations. Branches will be created as
# needed.
# format: branch tool1 tool2 ...
tools_all="master true
tmp-black black
tmp-black_isort black isort
tmp-black_usort black usort
tmp-black_pyupgrade black pyupgrade
tmp-black_isort_pyupgrade black isort pyupgrade
tmp-black_usort_pyupgrade black usort pyupgrade
qt6-v2 true"
tools="tmp-black_isort_pyupgrade black isort pyupgrade"
if [ "$(echo "$tools" | wc -l | cut -d' ' -f1)" -gt 1 ] ;then
# TODO: turn this "run it with all tool configurations and see which one
# is the worst" thing into a CLI option. This script is a cross between
# "gather stats" and "refine merge strategies" now and is in need of a bit
# of a refactor.
prompt_or_summary "Generate report for all tool configurations?"
fi
clean_branches
echo "$tools" | while read branch cmds; do
echo "$branch"
git checkout -q "$branch" 2>/dev/null || git checkout -q -b "$branch" origin/master
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "$cmd"
done
# Occasionally we get situations where black and pyupgrade build on each
# other to enable further changes. So the order you run them matters. But we
# have situations where each one enables the other, in both orders. So we
# run them all yet again to pick up any lingering changes from the first
# run.
echo "$cmds" | tr ' ' '\n' | while read cmd; do
$cmd qutebrowser tests
git commit -am "$cmd second run"
done
generate_report "$branch" y "$strategy" "$cmds" "$pull_request"
done
fi
summary
# todo:
# * see if we can run formatters on PR branches before/while merging
# * do most stuff based off of qt6-v2 instead of master, not like most PRs
# will be merged to pre-3.0 master anyway
# * for strategies where we skip PRs that failed in master include them in the
# report to for reference. With a marker to that affect and a total diffstat
# so we can see how big they are
# * *try the more simplistic "Run the formatter on all PR branches then merge"
# instead of trying to do it via a rebase*
# * try rebasing them to an autoformatted qt6-v2 branch
# notes:
# after merging qt6-v2 would merging old PRs to old master then somehow merging
# the PR merge commit up to the new master easier than rebasing the PR?
# there is a filter attribute you can use to re-write files before committing.
# For this use case probably the same as rebase -i --exec then merge?
# >See "Merging branches with differing checkin/checkout attributes" in gitattributes(5)
# if we go with the strategy of rebasing PRs on formatted commits how to deal
# with stopping isort making import loops on every damn PR. Still need to try
# rebasing directly on the latest formatted master instead of doing the
# intermediated one.