Improve download filenames for data: URLs
With QtWebEngine, downloading a data: URL seems to give us the raw data: URL as filename, similar to #1214 / #1321 but for QtWebEngine. With QtWebKit, the logic is now also improved so that we get a proper extension rather than a "binary blob" filename. See #1099
This commit is contained in:
parent
0a6c488b88
commit
9ae08c0f15
|
|
@ -465,6 +465,25 @@ class DownloadManager(downloads.AbstractDownloadManager):
|
|||
mhtml.start_download_checked, tab=tab))
|
||||
message.global_bridge.ask(question, blocking=False)
|
||||
|
||||
def _get_suggested_filename(self, request):
|
||||
"""Get the suggested filename for the given request."""
|
||||
filename_url = request.url()
|
||||
if request.url().scheme().lower() == 'data':
|
||||
# We might be downloading a binary blob embedded on a page or even
|
||||
# generated dynamically via javascript. If we happen to know where it's
|
||||
# coming from, we can try to figure out a more sensible name than the base64
|
||||
# content of the data.
|
||||
origin = request.originatingObject()
|
||||
try:
|
||||
filename_url = origin.url()
|
||||
except AttributeError:
|
||||
# Raised either if origin is None or some object that doesn't
|
||||
# have its own url. We're probably fine with a default fallback
|
||||
# based on the data URL then.
|
||||
pass
|
||||
|
||||
return urlutils.filename_from_url(filename_url, fallback='qutebrowser-download')
|
||||
|
||||
def get_request(self, request, *, target=None,
|
||||
suggested_fn=None, **kwargs):
|
||||
"""Start a download with a QNetworkRequest.
|
||||
|
|
@ -482,29 +501,8 @@ class DownloadManager(downloads.AbstractDownloadManager):
|
|||
request.setAttribute(QNetworkRequest.CacheLoadControlAttribute,
|
||||
QNetworkRequest.AlwaysNetwork)
|
||||
|
||||
if suggested_fn is not None:
|
||||
pass
|
||||
elif request.url().scheme().lower() != 'data':
|
||||
suggested_fn = urlutils.filename_from_url(request.url())
|
||||
else:
|
||||
# We might be downloading a binary blob embedded on a page or even
|
||||
# generated dynamically via javascript. We try to figure out a more
|
||||
# sensible name than the base64 content of the data.
|
||||
origin = request.originatingObject()
|
||||
try:
|
||||
origin_url = origin.url()
|
||||
except AttributeError:
|
||||
# Raised either if origin is None or some object that doesn't
|
||||
# have its own url. We're probably fine with a default fallback
|
||||
# then.
|
||||
suggested_fn = 'binary blob'
|
||||
else:
|
||||
# Use the originating URL as a base for the filename (works
|
||||
# e.g. for pdf.js).
|
||||
suggested_fn = urlutils.filename_from_url(origin_url)
|
||||
|
||||
if suggested_fn is None:
|
||||
suggested_fn = 'qutebrowser-download'
|
||||
suggested_fn = self._get_suggested_filename(request)
|
||||
|
||||
return self._fetch_request(request,
|
||||
target=target,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QObject
|
|||
from PyQt5.QtWebEngineWidgets import QWebEngineDownloadItem
|
||||
|
||||
from qutebrowser.browser import downloads, pdfjs
|
||||
from qutebrowser.utils import debug, usertypes, message, log, objreg
|
||||
from qutebrowser.utils import debug, usertypes, message, log, objreg, urlutils
|
||||
|
||||
|
||||
class DownloadItem(downloads.AbstractDownloadItem):
|
||||
|
|
@ -249,7 +249,14 @@ class DownloadManager(downloads.AbstractDownloadManager):
|
|||
@pyqtSlot(QWebEngineDownloadItem)
|
||||
def handle_download(self, qt_item):
|
||||
"""Start a download coming from a QWebEngineProfile."""
|
||||
suggested_filename = _get_suggested_filename(qt_item.path())
|
||||
if qt_item.url().scheme().lower() == 'data':
|
||||
# WORKAROUND for an unknown QtWebEngine bug (?) which gives us base64 data
|
||||
# as filename.
|
||||
suggested_filename = urlutils.filename_from_url(
|
||||
qt_item.url(), fallback='qutebrowser-download')
|
||||
else:
|
||||
suggested_filename = _get_suggested_filename(qt_item.path())
|
||||
|
||||
use_pdfjs = pdfjs.should_use_pdfjs(qt_item.mimeType(), qt_item.url())
|
||||
|
||||
download = DownloadItem(qt_item, manager=self)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
<title>data: link</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">download</a>
|
||||
<a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">plaintext</a>
|
||||
<a href="data:application/pdf;base64,cXV0ZWJyb3dzZXI=" id="pdf">PDF download</a>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -87,8 +87,18 @@ Feature: Downloading things from a website.
|
|||
When I set downloads.location.suggestion to filename
|
||||
And I set downloads.location.prompt to true
|
||||
And I open data/data_link.html
|
||||
And I hint with args "links download" and follow a
|
||||
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='binary blob' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
|
||||
And I hint with args "links download" and follow s
|
||||
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
|
||||
And I run :leave-mode
|
||||
Then no crash should happen
|
||||
|
||||
@qtwebkit_skip
|
||||
Scenario: Downloading a data: link via QtWebEngine (issue 1214)
|
||||
When I set downloads.location.suggestion to filename
|
||||
And I set downloads.location.prompt to true
|
||||
And I open data/data_link.html
|
||||
And I hint with args "links" and follow s
|
||||
And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
|
||||
And I run :leave-mode
|
||||
Then no crash should happen
|
||||
|
||||
|
|
|
|||
|
|
@ -18,11 +18,13 @@
|
|||
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os.path
|
||||
import base64
|
||||
|
||||
import pytest
|
||||
|
||||
pytest.importorskip('PyQt5.QtWebEngineWidgets')
|
||||
from PyQt5.QtWebEngineWidgets import QWebEngineProfile
|
||||
|
||||
from qutebrowser.utils import urlutils
|
||||
from qutebrowser.browser.webengine import webenginedownloads
|
||||
|
||||
|
||||
|
|
@ -38,3 +40,47 @@ from qutebrowser.browser.webengine import webenginedownloads
|
|||
])
|
||||
def test_get_suggested_filename(path, expected):
|
||||
assert webenginedownloads._get_suggested_filename(path) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_slash', [True, False])
|
||||
def test_data_url_workaround_needed(qapp, qtbot, webengineview, with_slash):
|
||||
"""With data URLs, we get rather weird base64 filenames back from QtWebEngine.
|
||||
|
||||
This test verifies that our workaround for this is still needed, i.e. if we get
|
||||
those base64-filenames rather than a "download.pdf" like with Chromium.
|
||||
"""
|
||||
# https://stackoverflow.com/a/17280876/2085149
|
||||
pdf_source = [
|
||||
'%PDF-1.0',
|
||||
'1 0 obj<</Pages 2 0 R>>endobj',
|
||||
'2 0 obj<</Kids[3 0 R]/Count 1>>endobj',
|
||||
'3 0 obj<</MediaBox[0 0 3 3]>>endobj',
|
||||
'trailer<</Root 1 0 R>>',
|
||||
]
|
||||
|
||||
if with_slash:
|
||||
pdf_source.insert(1, '% ?') # this results in a slash in base64
|
||||
|
||||
pdf_data = '\n'.join(pdf_source).encode('ascii')
|
||||
base64_data = base64.b64encode(pdf_data).decode('ascii')
|
||||
|
||||
if with_slash:
|
||||
assert '/' in base64_data
|
||||
expected = base64_data.split('/')[1]
|
||||
else:
|
||||
assert '/' not in base64_data
|
||||
expected = 'pdf' # from the mimetype
|
||||
|
||||
def check_item(item):
|
||||
assert item.mimeType() == 'application/pdf'
|
||||
assert item.url().scheme() == 'data'
|
||||
assert os.path.basename(item.path()) == expected
|
||||
return True
|
||||
|
||||
profile = QWebEngineProfile.defaultProfile()
|
||||
profile.setParent(qapp)
|
||||
|
||||
url = urlutils.data_url('application/pdf', pdf_data)
|
||||
|
||||
with qtbot.waitSignal(profile.downloadRequested, check_params_cb=check_item):
|
||||
webengineview.load(url)
|
||||
|
|
|
|||
Loading…
Reference in New Issue