From 99b380c33bcfb33d090fce66df7be8705a19f316 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Mar 2014 04:00:52 +0100 Subject: [PATCH] [comedycentral] Fix thedailyshow / thecolbertreport (Fixes #2600, #2596) --- youtube_dl/extractor/comedycentral.py | 114 ++++++++++++++------------ 1 file changed, 61 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index d50fcdbdb..1c29ddeb2 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -7,8 +7,8 @@ from ..utils import ( compat_str, compat_urllib_parse, - ExtractorError, + int_or_none, unified_strdate, ) @@ -32,31 +32,32 @@ class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralShowsIE(InfoExtractor): - IE_DESC = 'The Daily Show / Colbert Report' + IE_DESC = 'The Daily Show / The Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 - _VALID_URL = r"""^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport) - |(https?://)?(www\.)? - (?Pthedailyshow|colbertnation)\.com/ + _VALID_URL = r'''(?x)^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport) + |https?://(:www\.)? + (?Pthedailyshow|thecolbertreport)\.(?:cc\.)?com/ (full-episodes/(?P.*)| (?P (the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) |(watch/(?P[^/]*)/(?P.*)))| (?P extended-interviews/(?P[0-9]+)/playlist_tds_extended_(?P.*?)/.*?))) - $""" + $''' _TEST = { - 'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', - 'file': '422212.mp4', + 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'info_dict': { - "upload_date": "20121214", - "description": "Kristen Stewart", - "uploader": "thedailyshow", - "title": "thedailyshow-kristen-stewart part 1" + 'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55', + 'ext': 'mp4', + 'upload_date': '20121213', + 'description': 'Kristen Stewart learns to let loose in "On the Road."', + 'uploader': 'thedailyshow', + 'title': 'thedailyshow-kristen-stewart part 1', } } @@ -79,11 +80,6 @@ class ComedyCentralShowsIE(InfoExtractor): '400': (384, 216), } - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - @staticmethod def _transform_rtmp_url(rtmp_video_url): m = re.match(r'^rtmpe?://.*?/(?Pgsp\.comedystor/.*)$', rtmp_video_url) @@ -99,9 +95,9 @@ def _real_extract(self, url): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = 'http://thedailyshow.cc.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = 'http://thecolbertreport.cc.com/full-episodes/' mobj = re.match(self._VALID_URL, url, re.VERBOSE) assert mobj is not None @@ -120,9 +116,9 @@ def _real_extract(self, url): epTitle = mobj.group('showname') else: epTitle = mobj.group('episode') + show_name = mobj.group('showname') - self.report_extraction(epTitle) - webpage,htmlHandle = self._download_webpage_handle(url, epTitle) + webpage, htmlHandle = self._download_webpage_handle(url, epTitle) if dlNewest: url = htmlHandle.geturl() mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -130,10 +126,9 @@ def _real_extract(self, url): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode') + epTitle = mobj.group('episode').rpartition('/')[-1] mMovieParams = re.findall('(?: