#!/usr/bin/env python3 # SPDX-FileCopyrightText: Florian Bruhin (The Compiler) # # SPDX-License-Identifier: GPL-3.0-or-later """Various small code checkers.""" import os import os.path import re import sys import argparse import subprocess import tokenize import traceback import pathlib from typing import List, Iterator, Optional, Tuple REPO_ROOT = pathlib.Path(__file__).resolve().parents[2] sys.path.insert(0, str(REPO_ROOT)) from scripts import utils from scripts.dev import recompile_requirements BINARY_EXTS = {'.png', '.icns', '.ico', '.bmp', '.gz', '.bin', '.pdf', '.sqlite', '.woff2', '.whl', '.egg'} def _get_files( *, verbose: bool, ignored: List[pathlib.Path] = None ) -> Iterator[pathlib.Path]: """Iterate over all files and yield filenames.""" filenames = subprocess.run( ['git', 'ls-files', '--cached', '--others', '--exclude-standard', '-z'], stdout=subprocess.PIPE, text=True, check=True, ) all_ignored = ignored or [] all_ignored.append( pathlib.Path('tests', 'unit', 'scripts', 'importer_sample', 'chrome')) for filename in filenames.stdout.split('\0'): path = pathlib.Path(filename) is_ignored = any(path == p or p in path.parents for p in all_ignored) if not filename or path.suffix in BINARY_EXTS or is_ignored: continue try: with tokenize.open(path): pass except SyntaxError as e: # Could not find encoding utils.print_col("{} - maybe {} should be added to BINARY_EXTS?".format( str(e).capitalize(), path.suffix), 'yellow') continue if verbose: print(path) yield path def check_changelog_urls(_args: argparse.Namespace = None) -> bool: """Ensure we have changelog URLs for all requirements.""" ok = True all_requirements = set() for name in recompile_requirements.get_all_names(): outfile = recompile_requirements.get_outfile(name) missing = set() with open(outfile, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#') or not line: continue req, _version = recompile_requirements.parse_versioned_line(line) if req.startswith('./'): continue if " @ " in req: # vcs URL req = req.split(" @ ")[0] all_requirements.add(req) if req not in recompile_requirements.CHANGELOG_URLS: missing.add(req) if missing: ok = False req_str = ', '.join(sorted(missing)) utils.print_col( f"Missing changelog URLs in {name} requirements: {req_str}", 'red') extra = set(recompile_requirements.CHANGELOG_URLS) - all_requirements if extra: ok = False req_str = ', '.join(sorted(extra)) utils.print_col(f"Extra changelog URLs: {req_str}", 'red') if not ok: print("Hint: Changelog URLs are in scripts/dev/changelog_urls.json") return ok def check_git(_args: argparse.Namespace = None) -> bool: """Check for uncommitted git files.""" if not os.path.isdir(".git"): print("No .git dir, ignoring") print() return False untracked = [] gitst = subprocess.run(['git', 'status', '--porcelain'], check=True, stdout=subprocess.PIPE).stdout gitst = gitst.decode('UTF-8').strip() for line in gitst.splitlines(): s, name = line.split(maxsplit=1) if s == '??' and name != '.venv/': untracked.append(name) status = True if untracked: status = False utils.print_col("Untracked files:", 'red') print('\n'.join(untracked)) print() return status def _check_spelling_file(path, fobj, patterns): ok = True for num, line in enumerate(fobj, start=1): for pattern, explanation in patterns: match = pattern.search(line) if match: ok = False print(f'{path}:{num}: ', end='') utils.print_col(f'Found "{match.group(0)}" - {explanation}', 'blue') return ok def _check_spelling_all( args: argparse.Namespace, ignored: List[pathlib.Path], patterns: List[Tuple[re.Pattern, str]], ) -> Optional[bool]: try: ok = True for path in _get_files(verbose=args.verbose, ignored=ignored): with tokenize.open(str(path)) as f: if not _check_spelling_file(path, f, patterns): ok = False print() return ok except Exception: traceback.print_exc() return None def check_spelling(args: argparse.Namespace) -> Optional[bool]: """Check commonly misspelled words.""" # Words which I often misspell words = {'behaviour', 'quitted', 'likelyhood', 'sucessfully', 'occur[^rs .!,]', 'seperator', 'explicitely', 'auxillary', 'accidentaly', 'ambigious', 'loosly', 'initialis', 'convienence', 'similiar', 'uncommited', 'reproducable', 'an user', 'convienience', 'wether', 'programatically', 'splitted', 'exitted', 'mininum', 'resett?ed', 'recieved', 'regularily', 'underlaying', 'inexistant', 'elipsis', 'commiting', 'existant', 'resetted', 'similarily', 'informations', 'an url', 'treshold', 'artefact', 'an unix', 'an utf', 'an unicode', 'unparseable', 'dependancies', 'convertable', 'chosing', 'authentification'} # Words which look better when split, but might need some fine tuning. words |= {'webelements', 'mouseevent', 'keysequence', 'normalmode', 'eventloops', 'sizehint', 'statemachine', 'metaobject', 'logrecord'} patterns = [ ( re.compile(r'[{}{}]{}'.format(w[0], w[0].upper(), w[1:])), "Common misspelling or non-US spelling" ) for w in words ] qtbot_methods = { 'keyPress', 'keyRelease', 'keyClick', 'keyClicks', 'keyEvent', 'mousePress', 'mouseRelease', 'mouseClick', 'mouseMove', 'mouseDClick', 'keySequence', } qtbot_excludes = '|'.join(qtbot_methods) patterns += [ ( re.compile(r'(?i)# noqa(?!: )'), "Don't use a blanket 'noqa', use something like 'noqa: X123' instead.", ), ( re.compile(r'# type: (?!ignore(\[|$))'), "Don't use type comments, use type annotations instead.", ), ( re.compile(r': typing\.'), "Don't use typing.SomeType, do 'from typing import SomeType' instead.", ), ( re.compile(r"""monkeypatch\.setattr\(['"]"""), "Don't use monkeypatch.setattr('obj.attr', value), use " "setattr(obj, 'attr', value) instead.", ), ( re.compile(r'(exec|print)_\('), ".exec_()/.print_() are removed in PyQt 6, use .exec()/.print() instead.", ), ( re.compile(r'qApp'), "qApp is removed in PyQt 6, use QApplication.instance() instead.", ), ( re.compile(r'PYQT_CONFIGURATION'), "PYQT_CONFIGURATION is removed in PyQt 6", ), ( re.compile(r'Q_(ENUM|FLAG)'), "Q_ENUM and Q_FLAG are removed in PyQt 6", ), ( re.compile(r'attr\.(s|ib)($|\()'), "attrs have been replaced by dataclasses in qutebrowser.", ), ( re.compile(r'http://www\.gnu\.org/licenses/'), "use https:// URL.", ), ( re.compile(r'IOError'), "use OSError", ), ( re.compile(fr'qtbot\.(?!{qtbot_excludes})[a-z]+[A-Z].*'), "use snake-case instead", ), ( re.compile(r'\.joinpath\((?!\*)'), "use the / operator for joining paths", ), ( re.compile(r"""pathlib\.Path\(["']~["']\)\.expanduser\(\)"""), "use pathlib.Path.home() instead", ), ( re.compile(r'pathlib\.Path\(tmp_path\)'), "tmp_path already is a pathlib.Path", ), ( re.compile(r'pathlib\.Path\(tmpdir\)'), "use tmp_path instead", ), ( re.compile(r' Copyright 2'), "use 'SPDX-FileCopyrightText: ...' without year instead", ), ( re.compile(r'qutebrowser is free software: you can redistribute'), "use 'SPDX-License-Identifier: GPL-3.0-or-later' instead", ), ] # Files which should be ignored, e.g. because they come from another # package hint_data = pathlib.Path('tests', 'end2end', 'data', 'hints') ignored = [ pathlib.Path('scripts', 'dev', 'misc_checks.py'), pathlib.Path('scripts', 'dev', 'enums.txt'), pathlib.Path('qutebrowser', '3rdparty', 'pdfjs'), pathlib.Path('qutebrowser', 'qt', '_core_pyqtproperty.py'), pathlib.Path('qutebrowser', 'javascript', 'caret.js'), hint_data / 'ace' / 'ace.js', hint_data / 'bootstrap' / 'bootstrap.css', ] return _check_spelling_all(args=args, ignored=ignored, patterns=patterns) def check_pyqt_imports(args: argparse.Namespace) -> Optional[bool]: """Check for direct PyQt imports.""" ignored = [ pathlib.Path("qutebrowser", "qt"), pathlib.Path("misc", "userscripts"), pathlib.Path("scripts"), ] patterns = [ ( re.compile(r"from PyQt.* import"), "Use 'from qutebrowser.qt.MODULE import ...' instead", ), ( re.compile(r"import PyQt.*"), "Use 'import qutebrowser.qt.MODULE' instead", ) ] return _check_spelling_all(args=args, ignored=ignored, patterns=patterns) def check_vcs_conflict(args: argparse.Namespace) -> Optional[bool]: """Check VCS conflict markers.""" try: ok = True for path in _get_files(verbose=args.verbose): if path.suffix in {'.rst', '.asciidoc'}: # False positives continue with tokenize.open(path) as f: for line in f: if any(line.startswith(c * 7) for c in '<>=|'): print("Found conflict marker in {}".format(path)) ok = False print() return ok except Exception: traceback.print_exc() return None def check_userscripts_descriptions(_args: argparse.Namespace = None) -> bool: """Make sure all userscripts are described properly.""" folder = pathlib.Path('misc/userscripts') readme = folder / 'README.md' described = set() for line in readme.open('r'): line = line.strip() if line == '## Others': break match = re.fullmatch(r'- \[([^]]*)\].*', line) if match: described.add(match.group(1)) present = {path.name for path in folder.iterdir()} present -= {'README.md', '.mypy_cache', '__pycache__'} missing = present - described additional = described - present ok = True if missing: print("Missing userscript descriptions: {}".format(missing)) ok = False if additional: print("Additional userscript descriptions: {}".format(additional)) ok = False return ok def check_userscript_shebangs(_args: argparse.Namespace) -> bool: """Check that we're using /usr/bin/env in shebangs and scripts are executable.""" ok = True folder = pathlib.Path('misc/userscripts') for sub in folder.iterdir(): if sub.is_dir() or sub.name == 'README.md': continue if not os.access(sub, os.X_OK): print(f"{sub} is not marked as executable") ok = False with sub.open('r', encoding='utf-8') as f: shebang = f.readline().rstrip('\n') assert shebang.startswith('#!'), shebang shebang = shebang[2:] binary = shebang.split()[0] if binary not in ['/bin/sh', '/usr/bin/env']: bin_name = pathlib.Path(binary).name print(f"In {sub}, use #!/usr/bin/env {bin_name} instead of #!{binary}") ok = False elif shebang in ['/usr/bin/env python', '/usr/bin/env python2']: print(f"In {sub}, use #!/usr/bin/env python3 instead of #!{shebang}") ok = False return ok def check_vim_modelines(args: argparse.Namespace) -> bool: """Check that we're not using vim modelines.""" ok = True try: for path in _get_files(verbose=args.verbose): with tokenize.open(str(path)) as f: for num, line in enumerate(f, start=1): if not line.startswith("# vim:"): continue print(f"{path}:{num}: Remove vim modeline " "(deprecated in favor of .editorconfig)") ok = False except Exception: traceback.print_exc() ok = False return ok def main() -> int: checkers = { 'git': check_git, 'vcs': check_vcs_conflict, 'spelling': check_spelling, 'pyqt-imports': check_pyqt_imports, 'userscript-descriptions': check_userscripts_descriptions, 'userscript-shebangs': check_userscript_shebangs, 'changelog-urls': check_changelog_urls, 'vim-modelines': check_vim_modelines, } parser = argparse.ArgumentParser() parser.add_argument('--verbose', action='store_true', help='Show checked filenames') parser.add_argument('checker', choices=list(checkers) + ['all'], help="Which checker to run.") args = parser.parse_args() if args.checker == 'all': retvals = [] for name, checker in checkers.items(): utils.print_title(name) retvals.append(checker(args)) return 0 if all(retvals) else 1 checker = checkers[args.checker] ok = checker(args) return 0 if ok else 1 if __name__ == '__main__': sys.exit(main())