From cc1ac110173f77a22c9becbee4cb6522087986a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Sep 2015 23:59:17 +0600 Subject: [PATCH] [vier:videos] Fix extraction with old approach (Closes #6806) --- youtube_dl/extractor/vier.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 15377097e..c76c20614 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor @@ -91,31 +92,27 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) program = mobj.group('program') - webpage = self._download_webpage(url, program) - page_id = mobj.group('page') if page_id: page_id = int(page_id) start_page = page_id - last_page = start_page + 1 playlist_id = '%s-page%d' % (program, page_id) else: start_page = 0 - last_page = int(self._search_regex( - r'videos\?page=(\d+)">laatste', - webpage, 'last page', default=0)) + 1 playlist_id = program entries = [] - for current_page_id in range(start_page, last_page): + for current_page_id in itertools.count(start_page): current_page = self._download_webpage( 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), program, - 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage + 'Downloading page %d' % (current_page_id + 1)) page_entries = [ self.url_result('http://www.vier.be' + video_url, 'Vier') for video_url in re.findall( r'

', current_page)] entries.extend(page_entries) + if page_id or '>Meer<' not in current_page: + break return self.playlist_result(entries, playlist_id)