Fixes #476: create was broken for FTP URLs.

This commit is contained in:
Todd Gamblin 2016-03-02 01:41:43 -08:00
parent 8421dcb762
commit a0c6519de9

View File

@ -86,12 +86,12 @@ def _spider(args):
if not "Content-type" in resp.headers: if not "Content-type" in resp.headers:
tty.debug("ignoring page " + url) tty.debug("ignoring page " + url)
return pages return pages, links
if not resp.headers["Content-type"].startswith('text/html'): if not resp.headers["Content-type"].startswith('text/html'):
tty.debug("ignoring page " + url + " with content type " + tty.debug("ignoring page " + url + " with content type " +
resp.headers["Content-type"]) resp.headers["Content-type"])
return pages return pages, links
# Do the real GET request when we know it's just HTML. # Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET" req.get_method = lambda: "GET"
@ -173,7 +173,7 @@ def spider(root_url, **kwargs):
performance over a sequential fetch. performance over a sequential fetch.
""" """
max_depth = kwargs.setdefault('depth', 1) max_depth = kwargs.setdefault('depth', 1)
pages, links = _spider((root_url, set(), root_url, None, 1, max_depth, False)) pages, links = _spider((root_url, set(), root_url, None, 1, max_depth, False))
return pages, links return pages, links