[pbs] Fix multi part videos extraction

2024-11-10 09:07:58 +01:00 · 2016-02-11 22:02:37 +06:00 · 2016-02-11 22:02:37 +06:00 · 6b3fbd3425
commit 6b3fbd3425
parent a7ab46375b
1 changed files with 8 additions and 4 deletions
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -366,10 +366,14 @@ def _extract_webpage(self, url):
                webpage, 'upload date', default=None))

            # tabbed frontline videos
-            tabbed_videos = re.findall(
-                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
-            if tabbed_videos:
-                return tabbed_videos, presumptive_id, upload_date
+            MULTI_PART_REGEXES = (
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+            )
+            for p in MULTI_PART_REGEXES:
+                tabbed_videos = re.findall(p, webpage)
+                if tabbed_videos:
+                    return tabbed_videos, presumptive_id, upload_date

            MEDIA_ID_REGEXES = [
                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed