[youtube] Use a cookie for seeting the language

This way, we don't have to do an aditional request
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-11-30 00:03:59 +01:00
parent 37ea8164d3
commit 42939b6129
2 changed files with 11 additions and 18 deletions

View File

@ -13,6 +13,7 @@ import time
import xml.etree.ElementTree import xml.etree.ElementTree
from ..compat import ( from ..compat import (
compat_cookiejar,
compat_http_client, compat_http_client,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
@ -817,6 +818,11 @@ class InfoExtractor(object):
self._downloader.report_warning(msg) self._downloader.report_warning(msg)
return res return res
def _set_cookie(self, domain, name, value, expire_time=None):
cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -7,6 +7,7 @@ import itertools
import json import json
import os.path import os.path
import re import re
import time
import traceback import traceback
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
@ -38,16 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_NETRC_MACHINE = 'youtube' _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
def _set_language(self): def _set_language(self):
return bool(self._download_webpage( self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
self._LANG_URL, None, # YouTube sets the expire time to about two months
note='Setting language', errnote='unable to set language', expire_time=time.time() + 60*24*3600)
fatal=False))
def _login(self): def _login(self):
""" """
@ -178,9 +177,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
return return
if self._get_login_info()[0] is not None: self._set_language()
if not self._set_language():
return
if not self._login(): if not self._login():
return return
@ -667,16 +664,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Get video webpage # Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']
for pc in pref_cookies:
if 'hl=' in pc.value:
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
else:
if pc.value:
pc.value += '&'
pc.value += 'hl=en'
video_webpage = self._download_webpage(url, video_id) video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL # Attempt to extract SWF player URL