550 lines
18 KiB
Bash
Executable File
550 lines
18 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
# How to run:
|
|
# * Install the gh cli and run `gh login`: https://github.com/cli/cli/
|
|
# * install black isort usort pyupgrade and whatever other tools you want to
|
|
# play with in your active virtualenv
|
|
# * also requires "sponge" from moreutils
|
|
# * move to a new folder for the script to work in: `mkdir pr_mergability && cd pr_mergability`
|
|
# * ../scripts/check_mergability.sh
|
|
#
|
|
# It'll clone the qutebrowser repo, fetch refs for all the open PRs, checkout
|
|
# a branch, run auto formatters, try to merge each PR, report back via CSV
|
|
# how badly each merge filed (via "number of conflicting lines").
|
|
#
|
|
# For details of what auto formatters are ran see the `tools` variable down
|
|
# near the bottom of the script.
|
|
#
|
|
# If you've checked out a branch and ran auto-formatters or whatever on it
|
|
# manually and just want the script to try to merge all PRs you can call it
|
|
# with the branch name and it'll do so. Remember to go back up to the work dir
|
|
# before calling the script.
|
|
#
|
|
# If it's been a few days and PRs have been opened or merged delete `prs.json`
|
|
# from the working dir to have them re-fetched on next run.
|
|
# If PRs have had updates pushed you'll have to update the refs yourself or
|
|
# nuke the whole clone in the work dir and let the script re-fetch them all.
|
|
|
|
# requires the github binary, authorized, to list open PRs.
|
|
command -v gh > /dev/null || {
|
|
echo "Error: Install the github CLI, gh, make sure it is in PATH and authenticated."
|
|
exit 1
|
|
}
|
|
# requires some formatting tools available. The are all installable via pip.
|
|
all_formatters="black isort usort pyupgrade"
|
|
for cmd in $all_formatters; do
|
|
command -v $cmd >/dev/null || {
|
|
echo "Error: Requires all these tools to be in PATH (install them with pip): $all_formatters"
|
|
exit 1
|
|
}
|
|
done
|
|
|
|
[ -e qutebrowser/app.py ] && {
|
|
echo "don't run this from your qutebrowser checkout. Run it from a tmp dir, it'll checkout out a new copy to work on"
|
|
exit 1
|
|
}
|
|
|
|
TTY="$(tty)"
|
|
DO_PAUSE="no"
|
|
maybepause () {
|
|
msg="$1"
|
|
force="$2"
|
|
if [ -n "$force" ] ;then
|
|
DO_PAUSE="yes"
|
|
elif [ "$DO_PAUSE" = "yes" ] ;then
|
|
true
|
|
else
|
|
echo "$1"
|
|
return
|
|
fi
|
|
|
|
echo "$1, investigate in another terminal, continue? [Step|Continue|Quit]"
|
|
read response < $TTY
|
|
case "$response" in
|
|
[Cc]*) DO_PAUSE="no";;
|
|
[Qq]*) exit 0;;
|
|
*) return;;
|
|
esac
|
|
}
|
|
|
|
[ -d qutebrowser ] || {
|
|
git clone git@github.com:qutebrowser/qutebrowser.git
|
|
cd qutebrowser
|
|
git config --local merge.conflictstyle merge
|
|
git config --local rerere.enabled false
|
|
cd -
|
|
}
|
|
|
|
[ -e prs.json ] || {
|
|
# (re-)fetch list of open PRs. Pull refs for any new ones.
|
|
# Resets master and qt6-v2 in case they have changed. Does not handle
|
|
# fetching new changes for updated PRs.
|
|
echo "fetching open PRs"
|
|
gh -R qutebrowser/qutebrowser pr list -s open --json number,title,mergeable,updatedAt -L 100 > prs.json
|
|
cd qutebrowser
|
|
git fetch
|
|
git checkout master && git pull
|
|
git checkout qt6-v2 && git pull
|
|
# this is slow for a fresh clone, idk how to fetch all pull/*/head refs at once
|
|
jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
|
|
git describe pr/$number >/dev/null 2>&1 || git fetch origin refs/pull/$number/head:pr/$number
|
|
done
|
|
cd -
|
|
}
|
|
|
|
python3 <<"EOF"
|
|
import json
|
|
from collections import Counter
|
|
import rich
|
|
|
|
with open("prs.json") as f: prs=json.load(f)
|
|
|
|
rich.print(Counter([p['mergeable'] for p in prs]))
|
|
# Counter({'MERGEABLE': 29, 'CONFLICTING': 45})
|
|
EOF
|
|
|
|
summary () {
|
|
# Summarize the accumulated report CSVs
|
|
# Should be the last thing we do since it goes back up to the report dir
|
|
cd - >/dev/null
|
|
python3 <<"EOF"
|
|
import csv, glob
|
|
|
|
def read_csv(path):
|
|
with open(path) as f:
|
|
return list(csv.DictReader(f))
|
|
|
|
for report in sorted(glob.glob("report-*.csv")):
|
|
rows = read_csv(report)
|
|
succeeded = len([row for row in rows if row["state"] == "succeeded"])
|
|
failed = len([row for row in rows if row["state"] == "failed"])
|
|
print(f"{report} {succeeded=} {failed=}")
|
|
EOF
|
|
}
|
|
|
|
prompt_or_summary () {
|
|
printf "$1 [Yn]: "
|
|
read ans
|
|
case "$ans" in
|
|
[nN]*)
|
|
summary
|
|
exit 0
|
|
;;
|
|
*) true;;
|
|
esac
|
|
}
|
|
|
|
# format tool "aliases", where needed
|
|
usort () { env usort format "$@"; }
|
|
isort () { env isort -q "$@"; }
|
|
black () { env black -q "$@"; }
|
|
pyupgrade () { git ls-files | grep -F .py | xargs pyupgrade --py37-plus; }
|
|
|
|
generate_report () {
|
|
# checkout a branch, try to merge each of the open PRs, write the results to
|
|
# a CSV file
|
|
base="${1:-master}"
|
|
quiet="$2"
|
|
rewrite_strategy="$3"
|
|
cmds="$4"
|
|
pr="$5"
|
|
report_file=../report-$base.csv
|
|
|
|
# prefix for working branch when we are going to re-write stuff so we don't
|
|
# mess up the pr/* branches and have to re-fetch them.
|
|
[ -n "$rewrite_strategy" ] && {
|
|
prefix="tmp-rewrite-"
|
|
report_file=../report-$base-$rewrite_strategy.csv
|
|
}
|
|
|
|
git checkout -q $base
|
|
|
|
[ -e $report_file ] && [ -z "$quiet" ] && {
|
|
prompt_or_summary "$report_file exists, overwrite?"
|
|
}
|
|
|
|
echo "number,updated,title,state,clean,conflicting" > $report_file
|
|
report () {
|
|
echo "$1,$2,\"$3\",$4,$5,$6" >> $report_file
|
|
}
|
|
|
|
head_sha=$(git rev-parse HEAD)
|
|
jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
|
|
[ -n "$pr" ] && [ "$pr" != "$number" ] && continue
|
|
[ -n "$quiet" ] || echo "trying ${prefix}pr/$number $updated $title"
|
|
git reset -q --hard $head_sha
|
|
|
|
applies_cleanly_to_master () {
|
|
number="$1"
|
|
grep "^$number" ../report-master.csv | grep failed
|
|
[ $? -eq 1 ]
|
|
return $?
|
|
}
|
|
|
|
case "$rewrite_strategy" in
|
|
merge)
|
|
applies_cleanly_to_master $number || {
|
|
echo "pr/$number succeeded already in ../report-master.csv, skipping"
|
|
continue
|
|
}
|
|
merge_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
|
|
report $number $updated "$title" failed 999 999
|
|
continue
|
|
}
|
|
;;
|
|
rebase)
|
|
# Only attempt branches that actually merge cleanly with master.
|
|
# Theoretically it wouldn't hurt to do all of them but a) running
|
|
# black via the filter driver is slow b) rebase_with_formatting needs
|
|
# some work to handle more errors in that case (the "git commit -qam
|
|
# 'fix lint" bit at least needs to look for conflict markers)
|
|
# I'm hardcoding master because of a lack of imagination.
|
|
applies_cleanly_to_master $number || {
|
|
echo "pr/$number succeeded already in ../report-master.csv, skipping"
|
|
continue
|
|
}
|
|
rebase_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
|
|
report $number $updated "$title" failed 999 999
|
|
continue
|
|
}
|
|
;;
|
|
'')
|
|
true
|
|
;;
|
|
*)
|
|
echo "Unknown rewrite strategy '$rewrite_strategy'"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
git merge -q --no-ff --no-edit ${prefix}pr/$number 2>&1 1>/dev/null | grep -v preimage
|
|
if [ -e .git/MERGE_HEAD ] ;then
|
|
# merge failed, clean lines staged and conflicting lines in working
|
|
# tree
|
|
merged_lines=$(git diff --cached --numstat | awk -F' ' '{sum+=$1;} END{print sum;}')
|
|
conflicting_lines=$(git diff | sed -n -e '/<<<<<<< HEAD/,/=======$/p' -e '/=======$/,/>>>>>>> pr/p' | wc -l)
|
|
conflicting_lines=$(($conflicting_lines-4)) # account for markers included in both sed expressions
|
|
[ -n "$quiet" ] || echo "#$number failed merging merged_lines=$merged_lines conflicting_lines=$conflicting_lines"
|
|
maybepause "merge of ${prefix}pr/$number into $base failed"
|
|
git merge --abort
|
|
report $number $updated "$title" failed $merged_lines $conflicting_lines
|
|
else
|
|
[ -n "$quiet" ] || echo "#$number merged fine"
|
|
#git show HEAD --oneline --stat
|
|
report $number $updated "$title" succeeded 0 0
|
|
fi
|
|
done
|
|
}
|
|
|
|
add_smudge_filter () {
|
|
cmds="$1"
|
|
# Setup the filters. A "smudge" filter is configured for each tool then we
|
|
# add the required tools to a gitattributes file. And make sure to clean
|
|
# it up later.
|
|
# Running the formatters as filters is slower than running them directly
|
|
# because they seem to be run on the files serially. TODO: can we
|
|
# parallelize them?
|
|
# Maybe just adding a wrapper around the formatters that caches the output
|
|
# would be simpler. At least then you just have to sit through them once.
|
|
git config --local filter.rewrite.smudge "filter-cache"
|
|
printf "*.py" > .git/info/attributes
|
|
printf " filter=rewrite" >> .git/info/attributes
|
|
echo >> .git/info/attributes
|
|
|
|
mkdir filter-tools 2>/dev/null
|
|
cat > filter-tools/filter-cache <<EOF
|
|
#!/bin/sh
|
|
# Script to add as filter for git while rebasing.
|
|
# Runs the configured tools in sequence, caches the result of each tool in
|
|
# case you find yourself running through this proecss lots while working on
|
|
# it.
|
|
|
|
cmds="$cmds"
|
|
inputf="\$(mktemp --suffix=rebase)"
|
|
cat > "\$inputf"
|
|
|
|
# TODO: de-dup these with the parent script?
|
|
# Can use aliases here?
|
|
# Call with the file directly instead of using stdin?
|
|
usort () { env usort format -; }
|
|
black () { env black -q -; }
|
|
isort () { env isort -q -; }
|
|
pyupgrade () { env pyupgrade --exit-zero-even-if-changed --py37-plus -; }
|
|
|
|
run_with_cache () {
|
|
inputf="\$1"
|
|
cmd="\$2"
|
|
input_hash="\$(sha1sum "\$inputf" | cut -d' ' -f1)"
|
|
|
|
mkdir -p "/tmp/filter-caches/\$cmds/\$cmd" 2>/dev/null
|
|
outputf="/tmp/filter-caches/\$cmds/\$cmd/\$input_hash"
|
|
|
|
if [ -e "\$outputf" ] ;then
|
|
lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
|
|
# where are these empty output files coming from???
|
|
# echo "removing bad cached file '\$outputf'" >&2
|
|
[ \$lines -eq 0 ] && rm "\$outputf"
|
|
fi
|
|
|
|
if ! [ -e "\$outputf" ] ;then
|
|
\$cmd < "\$inputf" > "\$outputf"
|
|
[ \$? -eq 0 ] || {
|
|
echo "\$cmd failed" >&2
|
|
cat "\$inputf"
|
|
return
|
|
}
|
|
lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
|
|
[ \$lines -eq 0 ] && {
|
|
echo "tool '\$cmd' produced 0 line output file from '\$inputf'" >&2
|
|
}
|
|
fi
|
|
|
|
cat "\$outputf"
|
|
}
|
|
|
|
echo "\$cmds" | tr ' ' '\n' | while read cmd; do
|
|
run_with_cache \$inputf "\$cmd" | sponge \$inputf
|
|
done
|
|
|
|
cat "\$inputf"
|
|
rm "\$inputf"
|
|
EOF
|
|
chmod +x filter-tools/filter-cache
|
|
export PATH="$PWD/filter-tools:$PATH"
|
|
}
|
|
|
|
remove_smudge_filter () {
|
|
# no need to remove the config or script, it's only active when the
|
|
# attribute is set
|
|
rm .git/info/attributes
|
|
}
|
|
|
|
merge_with_formatting () {
|
|
number="$1"
|
|
base="$2"
|
|
cmds="$3"
|
|
prefix="${4:-tmp-rewrite-}"
|
|
strategy="$5"
|
|
|
|
# Use a temp base branch for now but adding "dropme" commits probably isn't the right
|
|
# strategy for the end goal of letting PR authors adapt to autoformatter
|
|
# changes. At that point we'll already have a re-formatted master branch.
|
|
# Unless we can do the merge then rebase-keep-merges-but-drop-dropme or
|
|
# something.
|
|
# TODO: swap out this block to be based off of real master or qt-v2 or $base
|
|
git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
|
|
done
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "dropme! $cmd 2"
|
|
done
|
|
|
|
git checkout -b ${prefix}pr/$number pr/$number
|
|
|
|
add_smudge_filter "$cmds"
|
|
|
|
git merge -X renormalize tmp-master-rewrite-pr/$number
|
|
exit_code="$?"
|
|
remove_smudge_filter
|
|
if [ $exit_code -eq 0 ] ;then
|
|
git commit -qam "fix lint"
|
|
else
|
|
maybepause "merge of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
|
|
git merge --abort
|
|
fi
|
|
git branch -D tmp-master-rewrite-pr/$number
|
|
|
|
[ $exit_code -eq 0 ] || return $exit_code
|
|
|
|
git checkout -q $base
|
|
}
|
|
|
|
rebase_with_formatting () {
|
|
number="$1"
|
|
base="$2"
|
|
cmds="$3"
|
|
prefix="${4:-tmp-rewrite-}"
|
|
strategy="$5"
|
|
|
|
# We need to apply formatting to PRs and base them on a reformatted base
|
|
# branch.
|
|
# I haven't looked into doing that via a merge but here is an attempt
|
|
# doing a rebase.
|
|
# Rebasing directly on to a formatted branch will fail very easily when it
|
|
# runs into a formatting change. So I'm using git's "filter" attribute to
|
|
# apply the same formatter to the trees corresponding to the
|
|
# commits being rebased. Hopefully if we apply the same formatter to the
|
|
# base branch and to the individual commits from the PRs we can minimize
|
|
# conflicts.
|
|
# An alternative to using the filter attribute might be to use something
|
|
# like the "darker" tool to re-write the commits. I suspect that won't
|
|
# help with conflicts in the context around changes though.
|
|
|
|
# Checkout the parent commit of the branch then apply formatting tools to
|
|
# it. This will provide a target for rebasing which doesn't have any
|
|
# additional drift from changes to master. After that then we can rebase
|
|
# the re-written PR branch to the more current, autoformatted, master.
|
|
# TODO: It might be possible to skip the intermediate base branch.
|
|
git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
|
|
done
|
|
# Occasionally we get situations where black and pyupgrade build on each
|
|
# other to enable further changes. So the order you run them matters. But we
|
|
# have situations where each one enables the other, in both orders. So we
|
|
# run them all yet again to pick up any lingering changes from the first
|
|
# run.
|
|
# If we don't do this the leftover changes can be picked up by the smudge
|
|
# filter during the first rebase below and added to "fix lint" commits. Then
|
|
# since they don't have "dropme!" in the messages they stick in the branch
|
|
# and end up conflicting with the base branch.
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "dropme! $cmd 2"
|
|
done
|
|
|
|
git checkout -b ${prefix}pr/$number pr/$number
|
|
|
|
add_smudge_filter "$cmds"
|
|
|
|
git rebase -q -X renormalize tmp-master-rewrite-pr/$number
|
|
exit_code="$?"
|
|
remove_smudge_filter
|
|
[ $exit_code -eq 0 ] || {
|
|
maybepause "rebase -X renormalize of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
|
|
git rebase --abort
|
|
}
|
|
git branch -D tmp-master-rewrite-pr/$number
|
|
|
|
[ $exit_code -eq 0 ] || return $exit_code
|
|
|
|
if [ "$strategy" = "rebase" ] ;then
|
|
# now transplant onto the actual upstream branch -- might have to drop this
|
|
# if it causes problems.
|
|
EDITOR='sed -i /dropme/d' git rebase -qi "$base" || {
|
|
maybepause "rebase of ${prefix}pr/$number onto $base failed"
|
|
git rebase --abort
|
|
return 1
|
|
}
|
|
fi
|
|
|
|
git checkout -q $base
|
|
}
|
|
|
|
cd qutebrowser
|
|
|
|
# run as `$0 some-branch` to report on merging all open PRs to a branch you
|
|
# made yourself. Otherwise run without args to try with a bunch of builtin
|
|
# configurations.
|
|
|
|
strategy=""
|
|
pull_request=""
|
|
while [ -n "$1" ] ;do
|
|
case "$1" in
|
|
-s|--rewrite-strategy)
|
|
shift
|
|
[ -n "$1" ] || {
|
|
echo "What strategy?"
|
|
exit 1
|
|
}
|
|
strategy="$1"
|
|
;;
|
|
-p|--pull-request)
|
|
shift
|
|
[ -n "$1" ] || {
|
|
echo "Which PR?"
|
|
exit 1
|
|
}
|
|
pull_request="$1"
|
|
;;
|
|
-*)
|
|
echo "Unknown argument '$1'"
|
|
exit 1
|
|
;;
|
|
*)
|
|
break
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
if [ -n "$1" ] ;then
|
|
generate_report "$1"
|
|
else
|
|
clean_branches () {
|
|
# only clean up tmp- branches in case I run it on my main qutebrowser
|
|
# checkout by mistake :)
|
|
git checkout master
|
|
git reset --hard origin/master
|
|
git branch -l | grep tmp- | grep -v detached | while read l; do git branch -qD $l ;done
|
|
}
|
|
|
|
# pre-defined auto-formatter configurations. Branches will be created as
|
|
# needed.
|
|
# format: branch tool1 tool2 ...
|
|
tools_all="master true
|
|
tmp-black black
|
|
tmp-black_isort black isort
|
|
tmp-black_usort black usort
|
|
tmp-black_pyupgrade black pyupgrade
|
|
tmp-black_isort_pyupgrade black isort pyupgrade
|
|
tmp-black_usort_pyupgrade black usort pyupgrade
|
|
qt6-v2 true"
|
|
tools="tmp-black_isort_pyupgrade black isort pyupgrade"
|
|
|
|
if [ "$(echo "$tools" | wc -l | cut -d' ' -f1)" -gt 1 ] ;then
|
|
# TODO: turn this "run it with all tool configurations and see which one
|
|
# is the worst" thing into a CLI option. This script is a cross between
|
|
# "gather stats" and "refine merge strategies" now and is in need of a bit
|
|
# of a refactor.
|
|
prompt_or_summary "Generate report for all tool configurations?"
|
|
fi
|
|
clean_branches
|
|
|
|
echo "$tools" | while read branch cmds; do
|
|
echo "$branch"
|
|
git checkout -q "$branch" 2>/dev/null || git checkout -q -b "$branch" origin/master
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "$cmd"
|
|
done
|
|
# Occasionally we get situations where black and pyupgrade build on each
|
|
# other to enable further changes. So the order you run them matters. But we
|
|
# have situations where each one enables the other, in both orders. So we
|
|
# run them all yet again to pick up any lingering changes from the first
|
|
# run.
|
|
echo "$cmds" | tr ' ' '\n' | while read cmd; do
|
|
$cmd qutebrowser tests
|
|
git commit -am "$cmd second run"
|
|
done
|
|
generate_report "$branch" y "$strategy" "$cmds" "$pull_request"
|
|
done
|
|
fi
|
|
|
|
summary
|
|
|
|
# todo:
|
|
# * see if we can run formatters on PR branches before/while merging
|
|
# * do most stuff based off of qt6-v2 instead of master, not like most PRs
|
|
# will be merged to pre-3.0 master anyway
|
|
# * for strategies where we skip PRs that failed in master include them in the
|
|
# report to for reference. With a marker to that affect and a total diffstat
|
|
# so we can see how big they are
|
|
# * *try the more simplistic "Run the formatter on all PR branches then merge"
|
|
# instead of trying to do it via a rebase*
|
|
# * try rebasing them to an autoformatted qt6-v2 branch
|
|
# notes:
|
|
# after merging qt6-v2 would merging old PRs to old master then somehow merging
|
|
# the PR merge commit up to the new master easier than rebasing the PR?
|
|
# there is a filter attribute you can use to re-write files before committing.
|
|
# For this use case probably the same as rebase -i --exec then merge?
|
|
# >See "Merging branches with differing checkin/checkout attributes" in gitattributes(5)
|
|
# if we go with the strategy of rebasing PRs on formatted commits how to deal
|
|
# with stopping isort making import loops on every damn PR. Still need to try
|
|
# rebasing directly on the latest formatted master instead of doing the
|
|
# intermediated one.
|