Remove the calls to 'compat_urllib_request.urlopen' in a few extractors

2024-11-02 17:22:31 +01:00 · 2013-12-08 22:24:55 +01:00 · 2013-12-08 22:24:55 +01:00 · baa7b1978b
commit baa7b1978b
parent ac5118bcb9
5 changed files with 42 additions and 86 deletions
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -51,8 +51,7 @@ def _real_extract(self, url):
            url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
        urlp = compat_urllib_parse_urlparse(url)
        if urlp.path.startswith('/play/'):
-            request = compat_urllib_request.Request(url)
-            response = compat_urllib_request.urlopen(request)
+            response = self._request_webpage(url, None, False)
            redirecturl = response.geturl()
            rurlp = compat_urllib_parse_urlparse(redirecturl)
            file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@ -69,8 +68,8 @@ def _real_extract(self, url):
        request.add_header('User-Agent', 'iTunes/10.6.1')
        self.report_extraction(mobj.group(1))
        info = None
-        try:
-            urlh = compat_urllib_request.urlopen(request)
+        urlh = self._request_webpage(request, None, False,
+            u'unable to download video info webpage')
        if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
            basename = url.split('/')[-1]
            title,ext = os.path.splitext(basename)
@ -86,8 +85,6 @@ def _real_extract(self, url):
                'ext': ext,
                'urlhandle': urlh
            }
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
        if info is None: # Regular URL
            try:
                json_code_bytes = urlh.read()
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -1,11 +1,8 @@
 import re
-import socket

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
    compat_parse_qs,
-    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_str,
@ -93,12 +90,8 @@ def report_disclaimer(self):

    def _real_initialize(self):
        # Retrieve disclaimer
-        request = compat_urllib_request.Request(self._DISCLAIMER)
-        try:
        self.report_disclaimer()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
+        self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')

        # Confirm age
        disclaimer_form = {
@ -107,11 +100,8 @@ def _real_initialize(self):
            }
        request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        try:
        self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        self._download_webpage(request, None, False, u'Unable to confirm age')

    def _real_extract(self, url):
        # Extract id and simplified title from URL
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -1,13 +1,10 @@
 import json
 import re
-import socket

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_urllib_error,
-    compat_urllib_request,
    unified_strdate,
+    ExtractorError,
 )


@ -31,9 +28,11 @@ def check_urls(self, url_list):
        """Returns 1st active url from list"""
        for url in url_list:
            try:
-                compat_urllib_request.urlopen(url)
+                # We only want to know if the request succeed
+                # don't download the whole file
+                self._request_webpage(url, None, False)
                return url
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
+            except ExtractorError:
                url = None

        return None
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -1,13 +1,8 @@
 import re
-import socket
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
    compat_str,
-    compat_urllib_error,
-    compat_urllib_request,

    ExtractorError,
    orderedSet,
@ -45,11 +40,7 @@ def _real_extract(self, url):
            self.report_extraction(info['id'])
            baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
            xmlUrl = baseUrl + video + '.xml'
-            try:
-                metaXml = compat_urllib_request.urlopen(xmlUrl).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
-            mdoc = xml.etree.ElementTree.fromstring(metaXml)
+            mdoc = self._download_xml(xmlUrl, info['id'])
            try:
                info['title'] = mdoc.findall('./title')[0].text
                info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@ -95,12 +86,9 @@ def _real_extract(self, url):
                'upload_date': None,
            }

-            self.report_download_webpage(info['id'])
            rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
-            try:
-                rootpage = compat_urllib_request.urlopen(rootURL).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
+            rootpage = self._download_webpage(rootURL, info['id'],
+                errnote=u'Unable to download course info page')

            info['title'] = info['id']

--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,7 +7,6 @@
 import json
 import os.path
 import re
-import socket
 import string
 import struct
 import traceback
@ -17,9 +16,7 @@
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_chr,
-    compat_http_client,
    compat_parse_qs,
-    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
@ -53,9 +50,9 @@ def _set_language(self):
        request = compat_urllib_request.Request(self._LANG_URL)
        try:
            self.report_lang()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
+            self._download_webpage(self._LANG_URL, None, False)
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
            return False
        return True

@ -67,12 +64,8 @@ def _login(self):
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
            return False

-        request = compat_urllib_request.Request(self._LOGIN_URL)
-        try:
-            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
-            return False
+        login_page = self._download_webpage(self._LOGIN_URL, None, False,
+            u'Unable to fetch login page')

        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
                                  login_page, u'Login GALX parameter')
@ -105,12 +98,12 @@ def _login(self):
        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
        try:
            self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            login_results = self._download_webpage(request, None, False)
            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
                self._downloader.report_warning(u'unable to log in: bad username or password')
                return False
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
            return False
        return True

@ -120,11 +113,8 @@ def _confirm_age(self):
                'action_confirm':   'Confirm',
                }
        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
-        try:
        self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        self._download_webpage(request, None, False, u'Unable to confirm age')
        return True

    def _real_initialize(self):
@ -1737,10 +1727,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'

-    def report_download_page(self, query, pagenum):
-        """Report attempt to download search page with given number."""
-        self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
-
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""

@ -1749,13 +1735,9 @@ def _get_n_results(self, query, n):
        limit = n

        while (50 * pagenum) < limit:
-            self.report_download_page(query, pagenum+1)
            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
-            request = compat_urllib_request.Request(result_url)
-            try:
-                data = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
+            data = self._download_webpage(result_url, u'query "%s"' % query,
+                u'Downloading page %s' % pagenum, u'Unable to download API page')
            api_response = json.loads(data)['data']

            if not 'items' in api_response: