qutebrowser/scripts/dev/misc_checks.py

#!/usr/bin/env python3
# Copyright 2014-2021 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser.  If not, see <https://www.gnu.org/licenses/>.

"""Various small code checkers."""

import os
import os.path
import re
import sys
import argparse
import subprocess
import tokenize
import traceback
import pathlib
from typing import List, Iterator, Optional, Tuple

REPO_ROOT = pathlib.Path(__file__).resolve().parents[2]
sys.path.insert(0, str(REPO_ROOT))

from scripts import utils
from scripts.dev import recompile_requirements

BINARY_EXTS = {'.png', '.icns', '.ico', '.bmp', '.gz', '.bin', '.pdf',
               '.sqlite', '.woff2', '.whl', '.egg'}


def _get_files(
        *,
        verbose: bool,
        ignored: List[pathlib.Path] = None
) -> Iterator[pathlib.Path]:
    """Iterate over all files and yield filenames."""
    filenames = subprocess.run(
        ['git', 'ls-files', '--cached', '--others', '--exclude-standard', '-z'],
        stdout=subprocess.PIPE,
        text=True,
        check=True,
    )
    all_ignored = ignored or []
    all_ignored.append(
        pathlib.Path('tests', 'unit', 'scripts', 'importer_sample', 'chrome'))

    for filename in filenames.stdout.split('\0'):
        path = pathlib.Path(filename)
        is_ignored = any(path == p or p in path.parents for p in all_ignored)
        if not filename or path.suffix in BINARY_EXTS or is_ignored:
            continue

        try:
            with tokenize.open(path):
                pass
        except SyntaxError as e:
            # Could not find encoding
            utils.print_col("{} - maybe {} should be added to BINARY_EXTS?".format(
                str(e).capitalize(), path.suffix), 'yellow')
            continue

        if verbose:
            print(path)

        yield path


def check_changelog_urls(_args: argparse.Namespace = None) -> bool:
    """Ensure we have changelog URLs for all requirements."""
    ok = True
    all_requirements = set()

    for name in recompile_requirements.get_all_names():
        outfile = recompile_requirements.get_outfile(name)
        missing = set()
        with open(outfile, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line.startswith('#') or not line:
                    continue
                req, _version = recompile_requirements.parse_versioned_line(line)
                if req.startswith('./'):
                    continue
                if " @ " in req:  # vcs URL
                    req = req.split(" @ ")[0]

                all_requirements.add(req)
                if req not in recompile_requirements.CHANGELOG_URLS:
                    missing.add(req)

        if missing:
            ok = False
            req_str = ', '.join(sorted(missing))
            utils.print_col(
                f"Missing changelog URLs in {name} requirements: {req_str}", 'red')

    extra = set(recompile_requirements.CHANGELOG_URLS) - all_requirements
    if extra:
        ok = False
        req_str = ', '.join(sorted(extra))
        utils.print_col(f"Extra changelog URLs: {req_str}", 'red')

    if not ok:
        print("Hint: Changelog URLs are in scripts/dev/changelog_urls.json")

    return ok


def check_git(_args: argparse.Namespace = None) -> bool:
    """Check for uncommitted git files."""
    if not os.path.isdir(".git"):
        print("No .git dir, ignoring")
        print()
        return False
    untracked = []
    gitst = subprocess.run(['git', 'status', '--porcelain'], check=True,
                           stdout=subprocess.PIPE).stdout
    gitst = gitst.decode('UTF-8').strip()
    for line in gitst.splitlines():
        s, name = line.split(maxsplit=1)
        if s == '??' and name != '.venv/':
            untracked.append(name)
    status = True
    if untracked:
        status = False
        utils.print_col("Untracked files:", 'red')
        print('\n'.join(untracked))
    print()
    return status


def _check_spelling_file(path, fobj, patterns):
    ok = True
    for num, line in enumerate(fobj, start=1):
        for pattern, explanation in patterns:
            match = pattern.search(line)
            if match:
                ok = False
                print(f'{path}:{num}: ', end='')
                utils.print_col(f'Found "{match.group(0)}" - {explanation}', 'blue')
    return ok


def _check_spelling_all(
    args: argparse.Namespace,
    ignored: List[pathlib.Path],
    patterns: List[Tuple[re.Pattern, str]],
) -> Optional[bool]:
    try:
        ok = True
        for path in _get_files(verbose=args.verbose, ignored=ignored):
            with tokenize.open(str(path)) as f:
                if not _check_spelling_file(path, f, patterns):
                    ok = False
        print()
        return ok
    except Exception:
        traceback.print_exc()
        return None


def check_spelling(args: argparse.Namespace) -> Optional[bool]:
    """Check commonly misspelled words."""
    # Words which I often misspell
    words = {'behaviour', 'quitted', 'likelyhood', 'sucessfully',
             'occur[^rs .!,]', 'seperator', 'explicitely', 'auxillary',
             'accidentaly', 'ambigious', 'loosly', 'initialis', 'convienence',
             'similiar', 'uncommited', 'reproducable', 'an user',
             'convienience', 'wether', 'programatically', 'splitted',
             'exitted', 'mininum', 'resett?ed', 'recieved', 'regularily',
             'underlaying', 'inexistant', 'elipsis', 'commiting', 'existant',
             'resetted', 'similarily', 'informations', 'an url', 'treshold',
             'artefact', 'an unix', 'an utf', 'an unicode', 'unparseable',
             'dependancies', 'convertable', 'chosing', 'authentification'}

    # Words which look better when split, but might need some fine tuning.
    words |= {'webelements', 'mouseevent', 'keysequence', 'normalmode',
              'eventloops', 'sizehint', 'statemachine', 'metaobject',
              'logrecord'}

    patterns = [
        (
            re.compile(r'[{}{}]{}'.format(w[0], w[0].upper(), w[1:])),
            "Common misspelling or non-US spelling"
        ) for w in words
    ]

    qtbot_methods = {
        'keyPress',
        'keyRelease',
        'keyClick',
        'keyClicks',
        'keyEvent',
        'mousePress',
        'mouseRelease',
        'mouseClick',
        'mouseMove',
        'mouseDClick',
        'keySequence',
    }

    qtbot_excludes = '|'.join(qtbot_methods)

    patterns += [
        (
            re.compile(r'(?i)# noqa(?!: )'),
            "Don't use a blanket 'noqa', use something like 'noqa: X123' instead.",
        ),
        (
            re.compile(r'# type: (?!ignore(\[|$))'),
            "Don't use type comments, use type annotations instead.",
        ),
        (
            re.compile(r': typing\.'),
            "Don't use typing.SomeType, do 'from typing import SomeType' instead.",
        ),
        (
            re.compile(r"""monkeypatch\.setattr\(['"]"""),
            "Don't use monkeypatch.setattr('obj.attr', value), use "
            "setattr(obj, 'attr', value) instead.",
        ),
        (
            re.compile(r'(exec|print)_\('),
            ".exec_()/.print_() are removed in PyQt 6, use .exec()/.print() instead.",
        ),
        (
            re.compile(r'qApp'),
            "qApp is removed in PyQt 6, use QApplication.instance() instead.",
        ),
        (
            re.compile(r'PYQT_CONFIGURATION'),
            "PYQT_CONFIGURATION is removed in PyQt 6",
        ),
        (
            re.compile(r'Q_(ENUM|FLAG)'),
            "Q_ENUM and Q_FLAG are removed in PyQt 6",
        ),
        (
            re.compile(r'attr\.(s|ib)($|\()'),
            "attrs have been replaced by dataclasses in qutebrowser.",
        ),
        (
            re.compile(r'http://www\.gnu\.org/licenses/'),
            "use https:// URL.",
        ),
        (
            re.compile(r'IOError'),
            "use OSError",
        ),
        (
            re.compile(fr'qtbot\.(?!{qtbot_excludes})[a-z]+[A-Z].*'),
            "use snake-case instead",
        ),
        (
            re.compile(r'\.joinpath\((?!\*)'),
            "use the / operator for joining paths",
        ),
        (
            re.compile(r"""pathlib\.Path\(["']~["']\)\.expanduser\(\)"""),
            "use pathlib.Path.home() instead",
        ),
        (
            re.compile(r'pathlib\.Path\(tmp_path\)'),
            "tmp_path already is a pathlib.Path",
        ),
        (
            re.compile(r'pathlib\.Path\(tmpdir\)'),
            "use tmp_path instead",
        ),
    ]

    # Files which should be ignored, e.g. because they come from another
    # package
    hint_data = pathlib.Path('tests', 'end2end', 'data', 'hints')
    ignored = [
        pathlib.Path('scripts', 'dev', 'misc_checks.py'),
        pathlib.Path('scripts', 'dev', 'enums.txt'),
        pathlib.Path('qutebrowser', '3rdparty', 'pdfjs'),
        pathlib.Path('qutebrowser', 'qt', '_core_pyqtproperty.py'),
        hint_data / 'ace' / 'ace.js',
        hint_data / 'bootstrap' / 'bootstrap.css',
    ]
    return _check_spelling_all(args=args, ignored=ignored, patterns=patterns)


def check_pyqt_imports(args: argparse.Namespace) -> Optional[bool]:
    """Check for direct PyQt imports."""
    ignored = [
        pathlib.Path("qutebrowser", "qt"),
        pathlib.Path("misc", "userscripts"),
        pathlib.Path("scripts"),
    ]
    patterns = [
        (
            re.compile(r"from PyQt.* import"),
            "Use 'from qutebrowser.qt.MODULE import ...' instead",
        ),
        (
            re.compile(r"import PyQt.*"),
            "Use 'import qutebrowser.qt.MODULE' instead",
        )
    ]
    return _check_spelling_all(args=args, ignored=ignored, patterns=patterns)


def check_vcs_conflict(args: argparse.Namespace) -> Optional[bool]:
    """Check VCS conflict markers."""
    try:
        ok = True
        for path in _get_files(verbose=args.verbose):
            if path.suffix in {'.rst', '.asciidoc'}:
                # False positives
                continue
            with tokenize.open(path) as f:
                for line in f:
                    if any(line.startswith(c * 7) for c in '<>=|'):
                        print("Found conflict marker in {}".format(path))
                        ok = False
        print()
        return ok
    except Exception:
        traceback.print_exc()
        return None


def check_userscripts_descriptions(_args: argparse.Namespace = None) -> bool:
    """Make sure all userscripts are described properly."""
    folder = pathlib.Path('misc/userscripts')
    readme = folder / 'README.md'

    described = set()
    for line in readme.open('r'):
        line = line.strip()
        if line == '## Others':
            break

        match = re.fullmatch(r'- \[([^]]*)\].*', line)
        if match:
            described.add(match.group(1))

    present = {path.name for path in folder.iterdir()}
    present -= {'README.md', '.mypy_cache', '__pycache__'}

    missing = present - described
    additional = described - present
    ok = True

    if missing:
        print("Missing userscript descriptions: {}".format(missing))
        ok = False
    if additional:
        print("Additional userscript descriptions: {}".format(additional))
        ok = False

    return ok


def check_userscript_shebangs(_args: argparse.Namespace) -> bool:
    """Check that we're using /usr/bin/env in shebangs and scripts are executable."""
    ok = True
    folder = pathlib.Path('misc/userscripts')

    for sub in folder.iterdir():
        if sub.is_dir() or sub.name == 'README.md':
            continue

        if not os.access(sub, os.X_OK):
            print(f"{sub} is not marked as executable")
            ok = False

        with sub.open('r', encoding='utf-8') as f:
            shebang = f.readline().rstrip('\n')
        assert shebang.startswith('#!'), shebang
        shebang = shebang[2:]

        binary = shebang.split()[0]
        if binary not in ['/bin/sh', '/usr/bin/env']:
            bin_name = pathlib.Path(binary).name
            print(f"In {sub}, use #!/usr/bin/env {bin_name} instead of #!{binary}")
            ok = False
        elif shebang in ['/usr/bin/env python', '/usr/bin/env python2']:
            print(f"In {sub}, use #!/usr/bin/env python3 instead of #!{shebang}")
            ok = False

    return ok


def check_vim_modelines(args: argparse.Namespace) -> bool:
    """Check that we're not using vim modelines."""
    ok = True
    try:
        for path in _get_files(verbose=args.verbose):
            with tokenize.open(str(path)) as f:
                for num, line in enumerate(f, start=1):
                    if not line.startswith("# vim:"):
                        continue
                    print(f"{path}:{num}: Remove vim modeline "
                          "(deprecated in favor of .editorconfig)")
                    ok = False
    except Exception:
        traceback.print_exc()
        ok = False

    return ok


def main() -> int:
    checkers = {
        'git': check_git,
        'vcs': check_vcs_conflict,
        'spelling': check_spelling,
        'pyqt-imports': check_pyqt_imports,
        'userscript-descriptions': check_userscripts_descriptions,
        'userscript-shebangs': check_userscript_shebangs,
        'changelog-urls': check_changelog_urls,
        'vim-modelines': check_vim_modelines,
    }

    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', action='store_true', help='Show checked filenames')
    parser.add_argument('checker',
                        choices=list(checkers) + ['all'],
                        help="Which checker to run.")
    args = parser.parse_args()

    if args.checker == 'all':
        retvals = []
        for name, checker in checkers.items():
            utils.print_title(name)
            retvals.append(checker(args))
        return 0 if all(retvals) else 1

    checker = checkers[args.checker]
    ok = checker(args)
    return 0 if ok else 1


if __name__ == '__main__':
    sys.exit(main())