Rewrite file collection in misc_checks.py

Now collects all (non-ignored) files via `git ls-files` and only skips some binary formats. Also adds some type annotations and fixes some newly unconvered UK-spellings.
2020-09-06 17:23:03 +02:00 · 2020-09-06 17:23:03 +02:00 · a78442bb67
parent 3abfd78c25
commit a78442bb67
5 changed files with 61 additions and 45 deletions
--- a/misc/nsis/qutebrowser.nsi
+++ b/misc/nsis/qutebrowser.nsi
@ -1,4 +1,5 @@
 # Copyright 2018 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
+# encoding: iso-8859-1
 #
 # This file is part of qutebrowser.
 #
--- a/misc/userscripts/cast
+++ b/misc/userscripts/cast
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Behaviour
+# Behavior
 #   Userscript for qutebrowser which casts the url passed in $1 to the default
 #   ChromeCast device in the network using the program `castnow`
 #
--- a/misc/userscripts/qute-bitwarden
+++ b/misc/userscripts/qute-bitwarden
@ -281,7 +281,7 @@ def main(arguments):
        qute_command('enter-mode insert')

    # If it finds a TOTP code, it copies it to the clipboard,
-    # which is the same behaviour as the Firefox add-on.
+    # which is the same behavior as the Firefox add-on.
    if not arguments.totp_only and totp and arguments.totp:
        # The import is done here, to make pyperclip an optional dependency
        import pyperclip
--- a/scripts/dev/misc_checks.py
+++ b/scripts/dev/misc_checks.py
@ -30,33 +30,45 @@ import tokenize
 import traceback
 import collections
 import pathlib
+from typing import List, Iterator, Optional

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir,
                                os.pardir))

 from scripts import utils

-
-def _get_files(only_py=False):
-    """Iterate over all python files and yield filenames."""
-    for (dirpath, _dirnames, filenames) in os.walk('.'):
-        parts = dirpath.split(os.sep)
-        if len(parts) >= 2:
-            rootdir = parts[1]
-            if rootdir.startswith('.') or rootdir == 'htmlcov':
-                # ignore hidden dirs and htmlcov
-                continue
-
-        if only_py:
-            endings = {'.py'}
-        else:
-            endings = {'.py', '.asciidoc', '.js', '.feature'}
-        files = (e for e in filenames if os.path.splitext(e)[1] in endings)
-        for name in files:
-            yield os.path.join(dirpath, name)
+BINARY_EXTS = {'.png', '.icns', '.ico', '.bmp', '.gz', '.bin', '.pdf',
+               '.sqlite', '.woff2'}


-def check_git():
+def _get_files(
+        *,
+        verbose: bool,
+        ignored: List[pathlib.Path] = None
+) -> Iterator[pathlib.Path]:
+    """Iterate over all files and yield filenames."""
+    filenames = subprocess.run(
+        ['git', 'ls-files', '--cached', '--others', '--exclude-standard', '-z'],
+        stdout=subprocess.PIPE,
+        universal_newlines=True
+    )
+    all_ignored = ignored or []
+    all_ignored.append(
+        pathlib.Path('tests', 'unit', 'scripts', 'importer_sample', 'chrome'))
+
+    for filename in filenames.stdout.split('\0'):
+        path = pathlib.Path(filename)
+        is_ignored = any(path == p or p in path.parents for p in all_ignored)
+        if not filename or path.suffix in BINARY_EXTS or is_ignored:
+            continue
+
+        if verbose:
+            print(path)
+
+        yield path
+
+
+def check_git(_args: argparse.Namespace) -> bool:
    """Check for uncommitted git files.."""
    if not os.path.isdir(".git"):
        print("No .git dir, ignoring")
@ -79,7 +91,7 @@ def check_git():
    return status


-def check_spelling():
+def check_spelling(args: argparse.Namespace) -> Optional[bool]:
    """Check commonly misspelled words."""
    # Words which I often misspell
    words = {'behaviour', 'quitted', 'likelyhood', 'sucessfully',
@ -95,32 +107,31 @@ def check_spelling():
    # Words which look better when splitted, but might need some fine tuning.
    words |= {'webelements', 'mouseevent', 'keysequence', 'normalmode',
              'eventloops', 'sizehint', 'statemachine', 'metaobject',
-              'logrecord', 'filetype'}
+              'logrecord'}

    # Files which should be ignored, e.g. because they come from another
    # package
+    hint_data = pathlib.Path('tests', 'end2end', 'data', 'hints')
    ignored = [
-        os.path.join('.', 'scripts', 'dev', 'misc_checks.py'),
-        os.path.join('.', 'qutebrowser', '3rdparty', 'pdfjs'),
-        os.path.join('.', 'tests', 'end2end', 'data', 'hints', 'ace',
-                     'ace.js'),
+        pathlib.Path('scripts', 'dev', 'misc_checks.py'),
+        pathlib.Path('qutebrowser', '3rdparty', 'pdfjs'),
+        hint_data / 'ace' / 'ace.js',
+        hint_data / 'bootstrap' / 'bootstrap.css',
    ]

    seen = collections.defaultdict(list)
    try:
        ok = True
-        for fn in _get_files():
-            with tokenize.open(fn) as f:
-                if any(fn.startswith(i) for i in ignored):
-                    continue
+        for path in _get_files(verbose=args.verbose, ignored=ignored):
+            with tokenize.open(str(path)) as f:
                for line in f:
                    for w in words:
                        pattern = '[{}{}]{}'.format(w[0], w[0].upper(), w[1:])
                        if (re.search(pattern, line) and
-                                fn not in seen[w] and
+                                path not in seen[w] and
                                '# pragma: no spellcheck' not in line):
-                            print('Found "{}" in {}!'.format(w, fn))
-                            seen[w].append(fn)
+                            print('Found "{}" in {}!'.format(w, path))
+                            seen[w].append(path)
                            ok = False
        print()
        return ok
@ -129,15 +140,18 @@ def check_spelling():
        return None


-def check_vcs_conflict():
+def check_vcs_conflict(args: argparse.Namespace) -> Optional[bool]:
    """Check VCS conflict markers."""
    try:
        ok = True
-        for fn in _get_files(only_py=True):
-            with tokenize.open(fn) as f:
+        for path in _get_files(verbose=args.verbose):
+            if path.suffix in {'.rst', '.asciidoc'}:
+                # False positives
+                continue
+            with tokenize.open(str(path)) as f:
                for line in f:
                    if any(line.startswith(c * 7) for c in '<>=|'):
-                        print("Found conflict marker in {}".format(fn))
+                        print("Found conflict marker in {}".format(path))
                        ok = False
        print()
        return ok
@ -146,7 +160,7 @@ def check_vcs_conflict():
        return None


-def check_userscripts_descriptions():
+def check_userscripts_descriptions(_args: argparse.Namespace) -> bool:
    """Make sure all userscripts are described properly."""
    folder = pathlib.Path('misc/userscripts')
    readme = folder / 'README.md'
@ -178,20 +192,21 @@ def check_userscripts_descriptions():
    return ok


-def main():
+def main() -> int:
    parser = argparse.ArgumentParser()
+    parser.add_argument('--verbose', action='store_true', help='Show checked filenames')
    parser.add_argument('checker',
                        choices=('git', 'vcs', 'spelling', 'userscripts'),
                        help="Which checker to run.")
    args = parser.parse_args()
    if args.checker == 'git':
-        ok = check_git()
+        ok = check_git(args)
    elif args.checker == 'vcs':
-        ok = check_vcs_conflict()
+        ok = check_vcs_conflict(args)
    elif args.checker == 'spelling':
-        ok = check_spelling()
+        ok = check_spelling(args)
    elif args.checker == 'userscripts':
-        ok = check_userscripts_descriptions()
+        ok = check_userscripts_descriptions(args)
    return 0 if ok else 1


--- a/tests/manual/hints/hide_unmatched_rapid_hints.html
+++ b/tests/manual/hints/hide_unmatched_rapid_hints.html
@ -7,7 +7,7 @@
    <body>
        <p>When <code>hints.hide_unmatched_rapid_hints</code> is set to true (default), rapid hints behave like normal hints, i.e. unmatched hints will be hidden as you type. Setting the option to false will disable hiding in rapid mode, which is sometimes useful (see <a href="https://github.com/qutebrowser/qutebrowser/issues/1799">#1799</a>).</p>
        <p>Note that when hinting in number mode, the <code>hints.hide_unmatched_rapid_hints</code> option affects typing the hint string (number), but not the filter (letters).</p>
-        <p>Here is couple of invalid links to test the behaviour:</p>
+        <p>Here is couple of invalid links to test the behavior:</p>
        <p><a href="#foo">one</a></p>
        <p><a href="#foo">two</a></p>
        <p><a href="#foo">three</a></p>