wip: Add initial pakjoy.py

This commit is contained in:
Florian Bruhin 2023-10-21 00:32:13 +02:00
parent 193b5a50a7
commit 238a0fa2d1
2 changed files with 202 additions and 0 deletions

View File

@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Utilities for parsing binary files.
Used by elf.py as well as pakjoy.py.
"""
import struct
from typing import Any, IO, Tuple
class ParseError(Exception):
"""Raised when the file can't be parsed."""
def unpack(fmt: str, fobj: IO[bytes]) -> Tuple[Any, ...]:
"""Unpack the given struct format from the given file."""
size = struct.calcsize(fmt)
data = safe_read(fobj, size)
try:
return struct.unpack(fmt, data)
except struct.error as e:
raise ParseError(e)
def safe_read(fobj: IO[bytes], size: int) -> bytes:
"""Read from a file, handling possible exceptions."""
try:
return fobj.read(size)
except (OSError, OverflowError) as e:
raise ParseError(e)
def safe_seek(fobj: IO[bytes], pos: int) -> None:
"""Seek in a file, handling possible exceptions."""
try:
fobj.seek(pos)
except (OSError, OverflowError) as e:
raise ParseError(e)

158
qutebrowser/misc/pakjoy.py Normal file
View File

@ -0,0 +1,158 @@
# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Chromium .pak repacking.
This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157
and the fact we can't just simply disable the hangouts extension:
https://bugreports.qt.io/browse/QTBUG-118452
It's yet another big hack. If you think this is bad, look at elf.py instead.
The name of this file might or might not be inspired by a certain vegetable,
as well as the "joy" this bug has caused me.
Useful references:
- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3)
- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/
- https://github.com/myfreeer/chrome-pak-customizer
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py
This is a "best effort" parser. If it errors out, we don't apply the workaround
instead of crashing.
"""
import dataclasses
from typing import ClassVar, IO, Optional, Dict, Tuple
from qutebrowser.misc import binparsing
HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome"
HANGOUTS_ID = 36197 # as found by toofar
TARGET_URL = b"https://*.google.com/*"
REPLACEMENT_URL = b"https://*.qb.invalid/*"
assert len(TARGET_URL) == len(REPLACEMENT_URL)
@dataclasses.dataclass
class Pak5Header:
"""Chromium .pak header."""
encoding: int # uint32
resource_count: int # uint16
alias_count: int # uint16
_FORMAT: ClassVar[str] = '<IHH'
@classmethod
def parse(cls, fobj: IO[bytes]) -> 'Pak5Header':
"""Parse a PAK version 5 header from a file."""
return cls(*binparsing.unpack(cls._FORMAT, fobj))
@dataclasses.dataclass
class PakEntry:
"""Entry description in a .pak file"""
resource_id: int # uint16
file_offset: int # uint32
size: int = 0 # not in file
_FORMAT: ClassVar[str] = '<HI'
@classmethod
def parse(cls, fobj: IO[bytes]) -> 'PakEntry':
"""Parse a PAK entry from a file."""
return cls(*binparsing.unpack(cls._FORMAT, fobj))
class PakParser:
def __init__(self, fobj: IO[bytes]) -> None:
"""Parse the .pak file from the given file object."""
version = binparsing.unpack("<I", fobj)[0]
if version != 5:
raise binparsing.ParseError(f"Unsupported .pak version {version}")
self.fobj = fobj
entries = self._read_header()
self.manifest_entry, self.manifest = self._find_manifest(entries)
def find_patch_offset(self) -> int:
try:
return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL)
except ValueError:
raise binparsing.ParseError("Couldn't find URL in manifest")
def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]:
self.fobj.seek(entry.file_offset)
data = self.fobj.read(entry.size)
if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"):
# not JSON
return None
if HANGOUTS_MARKER not in data:
return None
return data
def _read_header(self) -> Dict[int, PakEntry]:
"""Read the header and entry index from the .pak file."""
entries = []
header = Pak5Header.parse(self.fobj)
for _ in range(header.resource_count + 1): # + 1 due to sentinel at end
entries.append(PakEntry.parse(self.fobj))
for entry, next_entry in zip(entries, entries[1:]):
if entry.resource_id == 0:
raise binparsing.ParseError("Unexpected sentinel entry")
entry.size = next_entry.file_offset - entry.file_offset
if entries[-1].resource_id != 0:
raise binparsing.ParseError("Missing sentinel entry")
del entries[-1]
return {entry.resource_id: entry for entry in entries}
def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, str]:
if HANGOUTS_ID in entries:
suspected_entry = entries[HANGOUTS_ID]
manifest = self._maybe_get_hangouts_manifest(suspected_entry)
if manifest is not None:
return suspected_entry, manifest
# didn't find it via the prevously known ID, let's search them all...
for entry in entries:
manifest = self._maybe_get_hangouts_manifest(entry)
if manifest is not None:
return entry, manifest
raise binparsing.ParseError("Couldn't find hangouts manifest")
if __name__ == "__main__":
import shutil
shutil.copy("/usr/share/qt6/resources/qtwebengine_resources.pak", "/tmp/test.pak")
with open("/tmp/test.pak", "r+b") as f:
parser = PakParser(f)
print(parser.manifest_entry)
print(parser.manifest)
offset = parser.find_patch_offset()
f.seek(offset)
f.write(REPLACEMENT_URL)
with open("/tmp/test.pak", "rb") as f:
parser = PakParser(f)
print(parser.manifest_entry)
print(parser.manifest)