From a9bad429b309e614b4b8905c085ef425350ceeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Oct 2014 11:04:48 +0100 Subject: [PATCH 1/4] [niconico] Add extractor for playlists (closes #4043) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/niconico.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 615018c09..32236f0fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -252,7 +252,7 @@ from .nfb import NFBIE from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE -from .niconico import NiconicoIE +from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7b85589b7..62d5707fe 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import ( @@ -146,3 +147,36 @@ def _real_extract(self, url): 'duration': duration, 'webpage_url': webpage_url, } + + +class NiconicoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P\d+)' + + _TEST = { + 'url': 'http://www.nicovideo.jp/mylist/27411728', + 'info_dict': { + 'id': '27411728', + 'title': 'AKB48のオールナイトニッポン', + }, + 'playlist_mincount': 225, + } + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(url, list_id) + + entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', + webpage, 'entries') + entries = json.loads(entries_json) + entries = [{ + '_type': 'url', + 'ie_key': NiconicoIE.ie_key(), + 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], + } for entry in entries] + + return { + '_type': 'playlist', + 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), + 'id': list_id, + 'entries': entries, + } From 8abec2c8bb2ff1051af63420711706497faf6de4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Oct 2014 11:13:34 +0100 Subject: [PATCH 2/4] [test_utils] Fix compat_getenv and compat_expanduser tests on python 3.x --- test/test_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 19f9fce20..febba411e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -360,12 +360,14 @@ def test_js_to_json_edgecases(self): def test_compat_getenv(self): test_str = 'тест' - os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding()) + os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0) + else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) def test_compat_expanduser(self): test_str = 'C:\Documents and Settings\тест\Application Data' - os.environ['HOME'] = test_str.encode(get_filesystem_encoding()) + os.environ['HOME'] = (test_str if sys.version_info >= (3, 0) + else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_expanduser('~'), test_str) if __name__ == '__main__': From c451d4f5533de9bacba5ad7c186b7bbb3fb0b6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 29 Oct 2014 21:16:10 +0700 Subject: [PATCH 3/4] [trutube] Fix extraction --- youtube_dl/extractor/trutube.py | 38 +++++++++++++++------------------ 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index 57f956683..a73f3c43a 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,13 +1,12 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import xpath_text class TruTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P[0-9]+)/.*' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P[0-9]+)' + _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', 'info_dict': { @@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor): 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', 'thumbnail': 're:^http:.*\.jpg$', } - } + }, { + 'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_title = self._og_search_title(webpage).strip() - thumbnail = self._search_regex( - r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False) + config = self._download_xml( + 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, + video_id, transform_source=lambda s: s.strip()) - all_formats = re.finditer( - r"var (?P[a-z]+)_video_file\s*=\s*'(?P[^']+)';", webpage) - formats = [{ - 'format_id': m.group('key'), - 'quality': -i, - 'url': m.group('url'), - } for i, m in enumerate(all_formats)] - self._sort_formats(formats) + # filehd is always 404 + video_url = xpath_text(config, './file', 'video URL', fatal=True) + title = xpath_text(config, './title', 'title') + thumbnail = xpath_text(config, './image', ' thumbnail') return { 'id': video_id, - 'title': video_title, - 'formats': formats, + 'url': video_url, + 'title': title, 'thumbnail': thumbnail, } From dbd1283d31aa1df43bf9b3255dc27a1c8bfed4ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 29 Oct 2014 21:50:37 +0700 Subject: [PATCH 4/4] [naver] Capture and output error message (#4057) --- youtube_dl/extractor/naver.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c0231c197..cb1af9ecc 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -7,6 +7,7 @@ from ..utils import ( compat_urllib_parse, ExtractorError, + clean_html, ) @@ -31,6 +32,11 @@ def _real_extract(self, url): m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: + m_error = re.search( + r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', + webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2)