[ie/thisoldhouse] Support Brightcove embeds (#9576)

Closes #9570
Authored by: bashonly
This commit is contained in:
bashonly 2024-03-30 18:06:20 -05:00 committed by GitHub
parent 63f685f341
commit 0df63cce69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,5 +1,6 @@
import json import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor from .common import InfoExtractor
from .zype import ZypeIE from .zype import ZypeIE
from ..networking import HEADRequest from ..networking import HEADRequest
@ -8,6 +9,7 @@
ExtractorError, ExtractorError,
filter_dict, filter_dict,
parse_qs, parse_qs,
smuggle_url,
try_call, try_call,
urlencode_postdata, urlencode_postdata,
) )
@ -17,23 +19,43 @@ class ThisOldHouseIE(InfoExtractor):
_NETRC_MACHINE = 'thisoldhouse' _NETRC_MACHINE = 'thisoldhouse'
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
# Unresolved Brightcove URL embed (formerly Zype), free
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench', 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
'info_dict': { 'info_dict': {
'id': '5dcdddf673c3f956ef5db202', 'id': '6325298523112',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How to Build a Storage Bench', 'title': 'How to Build a Storage Bench',
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.', 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
'timestamp': 1442548800, 'timestamp': 1681793639,
'upload_date': '20150918', 'upload_date': '20230418',
'duration': 674, 'duration': 674.54,
'view_count': int, 'tags': 'count:11',
'average_rating': 0, 'uploader_id': '6314471934001',
'thumbnail': r're:^https?://.*\.jpg\?\d+$', 'thumbnail': r're:^https?://.*\.jpg',
'display_id': 'how-to-build-a-storage-bench',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# Brightcove embed, authwalled
'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
'info_dict': {
'id': '6349675446112',
'ext': 'mp4',
'title': 'E17 | Glen Ridge Generational | Multi-Generational',
'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
'timestamp': 1711382202,
'upload_date': '20240325',
'duration': 1422.229,
'tags': 'count:13',
'uploader_id': '6314471934001',
'thumbnail': r're:^https?://.*\.jpg',
},
'expected_warnings': ['Login with password is not supported for this website'],
'params': {
'skip_download': True,
},
'skip': 'Requires subscription',
}, { }, {
# Page no longer has video # Page no longer has video
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
@ -98,7 +120,15 @@ def _real_extract(self, url):
video_url, video_id = self._search_regex( video_url, video_id = self._search_regex(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]', r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
webpage, 'video url', group=(1, 2)) webpage, 'zype url', group=(1, 2), default=(None, None))
if video_url:
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
return self.url_result(video_url, ZypeIE, video_id) return self.url_result(video_url, ZypeIE, video_id)
video_url, video_id = self._search_regex([
r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
webpage, 'iframe url', group=(1, 2))
if not parse_qs(video_url).get('videoId'):
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)