From 412f356e04b0daaa1a862f8fdc155ae63376e7d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 14:47:23 +0200 Subject: [PATCH 1/7] [gameone] Add new extractor gameone Currently only usable for downloading tv episodes residing under http://www.gameone.de/tv/ --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gameone.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/gameone.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..a294f66ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -103,6 +103,7 @@ from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE +from .gameone import GameOneIE from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py new file mode 100644 index 000000000..a8a290477 --- /dev/null +++ b/youtube_dl/extractor/gameone.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import xpath_with_ns + +NAMESPACE_MAP = { + 'media': 'http://search.yahoo.com/mrss/', +} + +RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' + +class GameOneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' + _TESTS = { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage, secure=False) + mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') + + mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') + title = mrss.find('.//item/title').text + thumbnail = mrss.find('.//item/image').get('url') + content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) + content_url = content.get('url') + + content = self._download_xml(content_url, video_id, 'Downloading media:content') + rendition_items = content.findall('.//rendition') + duration = int(rendition_items[0].get('duration')) + formats = [ + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } + for r in rendition_items + ] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } From 10d5c7aa5fcc4a05b039cc147b3e36732a56b0d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 15:10:19 +0200 Subject: [PATCH 2/7] [gameone] Added explanation for usage of http://cdn.riptide-mtvn.com/ --- youtube_dl/extractor/gameone.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index a8a290477..d5fb19cec 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -10,6 +10,8 @@ 'media': 'http://search.yahoo.com/mrss/', } +# URL prefix to download the mp4 files directly instead of streaming via rtmp +# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): From 9e30092361c3b94d66bf2aaf99087d0df201718c Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 17:07:40 +0200 Subject: [PATCH 3/7] [gameone] Added extraction of description and fixed failing tests --- youtube_dl/extractor/gameone.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index d5fb19cec..855df74fb 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..utils import xpath_with_ns @@ -16,7 +17,7 @@ class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' - _TESTS = { + _TEST = { 'url': 'http://www.gameone.de/tv/288', 'md5': '136656b7fb4c9cb4a8e2d500651c499b', 'info_dict': { @@ -25,6 +26,11 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' + 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' + 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' + 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' + 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' } } @@ -39,6 +45,7 @@ def _real_extract(self, url): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -61,4 +68,9 @@ def _real_extract(self, url): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'description': description, } + + def _extract_description(self, mrss): + description = mrss.find('.//item/description') + return u''.join(t for t in description.itertext()) From a84d20fc14eb70310af85da385c879c365fd7897 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:20:29 +0200 Subject: [PATCH 4/7] [gameone] Simplified extraction of description --- youtube_dl/extractor/gameone.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 855df74fb..aa0234346 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -26,11 +26,7 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' - 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' - 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' - 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' - 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', } } @@ -40,12 +36,12 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) + description = self._html_search_meta('description', webpage) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') - description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -70,7 +66,3 @@ def _real_extract(self, url): 'formats': formats, 'description': description, } - - def _extract_description(self, mrss): - description = mrss.find('.//item/description') - return u''.join(t for t in description.itertext()) From a231ce87b56d85354f66d4a9b26763bc73ca86c1 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:35:11 +0200 Subject: [PATCH 5/7] [gameone] Added extraction of age_limit --- youtube_dl/extractor/gameone.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index aa0234346..3b3870878 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -27,6 +27,7 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16 } } @@ -37,6 +38,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) + age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -65,4 +67,5 @@ def _real_extract(self, url): 'duration': duration, 'formats': formats, 'description': description, + 'age_limit': age_limit, } From 305d0683628d26c8e9ba04c77c4b3c7283106f80 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 19:04:02 +0200 Subject: [PATCH 6/7] [gameone] Added timestamp extraction --- youtube_dl/extractor/gameone.py | 14 +++++++++++--- youtube_dl/utils.py | 6 +++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 3b3870878..008eb90a5 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,10 +2,12 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree as ET from .common import InfoExtractor -from ..utils import xpath_with_ns +from ..utils import ( + xpath_with_ns, + parse_iso8601 +) NAMESPACE_MAP = { 'media': 'http://search.yahoo.com/mrss/', @@ -15,6 +17,8 @@ # Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' +PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' + class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' _TEST = { @@ -27,7 +31,9 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16 + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, } } @@ -44,6 +50,7 @@ def _real_extract(self, url): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -68,4 +75,5 @@ def _real_extract(self, url): 'formats': formats, 'description': description, 'age_limit': age_limit, + 'timestamp': timestamp, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1036ea9bd..3e7947f5d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -765,7 +765,7 @@ def http_response(self, req, resp): https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'): """ Return a UNIX timestamp from the given date """ if date_str is None: @@ -785,8 +785,8 @@ def parse_iso8601(date_str): timezone = datetime.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) - - dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) From e5c3a4b54995422dcef1d2fbb032446e35358e8d Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Mon, 19 May 2014 22:33:51 +0200 Subject: [PATCH 7/7] [gameone] Fix indentation and removed unused constants --- youtube_dl/extractor/gameone.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 008eb90a5..2544ea521 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -14,10 +14,10 @@ } # URL prefix to download the mp4 files directly instead of streaming via rtmp -# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 +# Credits go to XBox-Maniac +# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' -PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' @@ -44,7 +44,14 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) - age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) + age_limit = int( + self._search_regex( + r'age=(\d+)', + self._html_search_meta( + 'age-de-meta-label', + webpage), + 'age_limit', + '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -54,16 +61,19 @@ def _real_extract(self, url): content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') - content = self._download_xml(content_url, video_id, 'Downloading media:content') + content = self._download_xml( + content_url, + video_id, + 'Downloading media:content') rendition_items = content.findall('.//rendition') duration = int(rendition_items[0].get('duration')) formats = [ - { - 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int(r.get('width')), - 'height': int(r.get('height')), - 'tbr': int(r.get('bitrate')), - } + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } for r in rendition_items ]