Add overrides for mimetype -> extension conversion

Python 3.6 gets some of those "wrong":
https://bugs.python.org/issue1043134

This has been fixed in Python 3.7:
https://github.com/python/cpython/pull/14375

The override dict has been generated by copying the types_map from the
current git master:
https://github.com/python/cpython/blob/v3.10.0a3/Lib/mimetypes.py#L414-L547

And then running the following with Python 3.6:

    import mimetypes

    reverse = {}
    for ext, mimetype in types_map.items():
        if mimetype not in reverse:
            reverse[mimetype] = ext
    assert reverse['text/plain'] == '.txt'

    for mimetype, ext in reverse.items():
        got = mimetypes.guess_extension(mimetype)
        if got != ext:
            print(f'    "{mimetype}": "{ext}",  # not {got}')
This commit is contained in:
Florian Bruhin 2021-01-04 21:52:44 +01:00
parent 40464ebe3d
commit ebb3046822
3 changed files with 47 additions and 1 deletions

View File

@ -412,7 +412,7 @@ def filename_from_url(url: QUrl, fallback: str = None) -> Optional[str]:
if not mimetype:
return fallback
ext = mimetypes.guess_extension(mimetype, strict=False) or ''
ext = utils.mimetype_extension(mimetype) or ''
return 'download' + ext
pathname = posixpath.basename(url.path())

View File

@ -803,3 +803,38 @@ def parse_duration(duration: str) -> int:
hours = float(hours_string.rstrip('h'))
milliseconds = int((seconds + minutes * 60 + hours * 3600) * 1000)
return milliseconds
def mimetype_extension(mimetype: str) -> Optional[str]:
"""Get a suitable extension for a given mimetype.
This mostly delegates to Python's mimetypes.guess_extension(), but backports some
changes (via a simple override dict) which are missing from earlier Python versions.
Most likely, this can be dropped once the minimum Python version is raised to 3.7.
"""
overrides = {
# Added around 3.8
"application/manifest+json": ".webmanifest",
"application/x-hdf5": ".h5",
# Added in Python 3.7
"application/wasm": ".wasm",
# Wrong values for Python 3.6
# https://bugs.python.org/issue1043134
# https://github.com/python/cpython/pull/14375
"application/octet-stream": ".bin", # not .a
"application/postscript": ".ps", # not .ai
"application/vnd.ms-excel": ".xls", # not .xlb
"application/vnd.ms-powerpoint": ".ppt", # not .pot
"application/xml": ".xsl", # not .rdf
"audio/mpeg": ".mp3", # not .mp2
"image/jpeg": ".jpg", # not .jpe
"image/tiff": ".tiff", # not .tif
"text/html": ".html", # not .htm
"text/plain": ".txt", # not .bat
"video/mpeg": ".mpeg", # not .m1v
}
if mimetype in overrides:
return overrides[mimetype]
return mimetypes.guess_extension(mimetype, strict=False)

View File

@ -867,3 +867,14 @@ def test_parse_duration_hypothesis(duration):
utils.parse_duration(duration)
except ValueError:
pass
@pytest.mark.parametrize('mimetype, extension', [
('application/pdf', '.pdf'), # handled by Python
('text/plain', '.txt'), # wrong in Python 3.6, overridden
('application/manifest+json', '.webmanifest'), # newer
('text/xul', '.xul'), # strict=False
('doesnot/exist', None),
])
def test_mimetype_extension(mimetype, extension):
assert utils.mimetype_extension(mimetype) == extension