Improve download filenames for data: URLs

With QtWebEngine, downloading a data: URL seems to give us the raw data:
URL as filename, similar to #1214 / #1321 but for QtWebEngine.

With QtWebKit, the logic is now also improved so that we get a proper
extension rather than a "binary blob" filename.

See #1099
This commit is contained in:
Florian Bruhin 2021-01-04 19:36:42 +01:00
parent 0a6c488b88
commit 9ae08c0f15
5 changed files with 90 additions and 28 deletions

View File

@ -465,6 +465,25 @@ class DownloadManager(downloads.AbstractDownloadManager):
mhtml.start_download_checked, tab=tab))
message.global_bridge.ask(question, blocking=False)
def _get_suggested_filename(self, request):
"""Get the suggested filename for the given request."""
filename_url = request.url()
if request.url().scheme().lower() == 'data':
# We might be downloading a binary blob embedded on a page or even
# generated dynamically via javascript. If we happen to know where it's
# coming from, we can try to figure out a more sensible name than the base64
# content of the data.
origin = request.originatingObject()
try:
filename_url = origin.url()
except AttributeError:
# Raised either if origin is None or some object that doesn't
# have its own url. We're probably fine with a default fallback
# based on the data URL then.
pass
return urlutils.filename_from_url(filename_url, fallback='qutebrowser-download')
def get_request(self, request, *, target=None,
suggested_fn=None, **kwargs):
"""Start a download with a QNetworkRequest.
@ -482,29 +501,8 @@ class DownloadManager(downloads.AbstractDownloadManager):
request.setAttribute(QNetworkRequest.CacheLoadControlAttribute,
QNetworkRequest.AlwaysNetwork)
if suggested_fn is not None:
pass
elif request.url().scheme().lower() != 'data':
suggested_fn = urlutils.filename_from_url(request.url())
else:
# We might be downloading a binary blob embedded on a page or even
# generated dynamically via javascript. We try to figure out a more
# sensible name than the base64 content of the data.
origin = request.originatingObject()
try:
origin_url = origin.url()
except AttributeError:
# Raised either if origin is None or some object that doesn't
# have its own url. We're probably fine with a default fallback
# then.
suggested_fn = 'binary blob'
else:
# Use the originating URL as a base for the filename (works
# e.g. for pdf.js).
suggested_fn = urlutils.filename_from_url(origin_url)
if suggested_fn is None:
suggested_fn = 'qutebrowser-download'
suggested_fn = self._get_suggested_filename(request)
return self._fetch_request(request,
target=target,

View File

@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QObject
from PyQt5.QtWebEngineWidgets import QWebEngineDownloadItem
from qutebrowser.browser import downloads, pdfjs
from qutebrowser.utils import debug, usertypes, message, log, objreg
from qutebrowser.utils import debug, usertypes, message, log, objreg, urlutils
class DownloadItem(downloads.AbstractDownloadItem):
@ -249,7 +249,14 @@ class DownloadManager(downloads.AbstractDownloadManager):
@pyqtSlot(QWebEngineDownloadItem)
def handle_download(self, qt_item):
"""Start a download coming from a QWebEngineProfile."""
suggested_filename = _get_suggested_filename(qt_item.path())
if qt_item.url().scheme().lower() == 'data':
# WORKAROUND for an unknown QtWebEngine bug (?) which gives us base64 data
# as filename.
suggested_filename = urlutils.filename_from_url(
qt_item.url(), fallback='qutebrowser-download')
else:
suggested_filename = _get_suggested_filename(qt_item.path())
use_pdfjs = pdfjs.should_use_pdfjs(qt_item.mimeType(), qt_item.url())
download = DownloadItem(qt_item, manager=self)

View File

@ -5,6 +5,7 @@
<title>data: link</title>
</head>
<body>
<a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">download</a>
<a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">plaintext</a>
<a href="data:application/pdf;base64,cXV0ZWJyb3dzZXI=" id="pdf">PDF download</a>
</body>
</html>

View File

@ -87,8 +87,18 @@ Feature: Downloading things from a website.
When I set downloads.location.suggestion to filename
And I set downloads.location.prompt to true
And I open data/data_link.html
And I hint with args "links download" and follow a
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='binary blob' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
And I hint with args "links download" and follow s
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
And I run :leave-mode
Then no crash should happen
@qtwebkit_skip
Scenario: Downloading a data: link via QtWebEngine (issue 1214)
When I set downloads.location.suggestion to filename
And I set downloads.location.prompt to true
And I open data/data_link.html
And I hint with args "links" and follow s
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
And I run :leave-mode
Then no crash should happen

View File

@ -18,11 +18,13 @@
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
import os.path
import base64
import pytest
pytest.importorskip('PyQt5.QtWebEngineWidgets')
from PyQt5.QtWebEngineWidgets import QWebEngineProfile
from qutebrowser.utils import urlutils
from qutebrowser.browser.webengine import webenginedownloads
@ -38,3 +40,47 @@ from qutebrowser.browser.webengine import webenginedownloads
])
def test_get_suggested_filename(path, expected):
assert webenginedownloads._get_suggested_filename(path) == expected
@pytest.mark.parametrize('with_slash', [True, False])
def test_data_url_workaround_needed(qapp, qtbot, webengineview, with_slash):
"""With data URLs, we get rather weird base64 filenames back from QtWebEngine.
This test verifies that our workaround for this is still needed, i.e. if we get
those base64-filenames rather than a "download.pdf" like with Chromium.
"""
# https://stackoverflow.com/a/17280876/2085149
pdf_source = [
'%PDF-1.0',
'1 0 obj<</Pages 2 0 R>>endobj',
'2 0 obj<</Kids[3 0 R]/Count 1>>endobj',
'3 0 obj<</MediaBox[0 0 3 3]>>endobj',
'trailer<</Root 1 0 R>>',
]
if with_slash:
pdf_source.insert(1, '% ?') # this results in a slash in base64
pdf_data = '\n'.join(pdf_source).encode('ascii')
base64_data = base64.b64encode(pdf_data).decode('ascii')
if with_slash:
assert '/' in base64_data
expected = base64_data.split('/')[1]
else:
assert '/' not in base64_data
expected = 'pdf' # from the mimetype
def check_item(item):
assert item.mimeType() == 'application/pdf'
assert item.url().scheme() == 'data'
assert os.path.basename(item.path()) == expected
return True
profile = QWebEngineProfile.defaultProfile()
profile.setParent(qapp)
url = urlutils.data_url('application/pdf', pdf_data)
with qtbot.waitSignal(profile.downloadRequested, check_params_cb=check_item):
webengineview.load(url)