[tagesschau] Relax _VALID_URL

This commit is contained in:
Sergey M․ 2016-05-01 06:57:19 +06:00
parent 6a0f9a24d0
commit 651ad35ce0
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -125,7 +125,7 @@ def _real_extract(self, url):
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)?)(?:~_?[^/#?]+?)?\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
@ -197,6 +197,9 @@ class TagesschauIE(InfoExtractor):
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
'only_matching': True,
}, {
'url': 'http://www.tagesschau.de/100sekunden/index.html',
'only_matching': True,
}]
@classmethod
@ -256,7 +259,8 @@ def _extract_formats(self, download_text, media_kind):
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('path')
display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id)