[viewlift] replace SnagFilms extractors

- add support for other sites that use the same logic
- improve format extraction and sorting
This commit is contained in:
remitamine 2016-04-29 11:14:42 +01:00
parent 14638e2915
commit 67167920db
3 changed files with 34 additions and 19 deletions

View file

@ -673,10 +673,6 @@
SmotriUserIE,
SmotriBroadcastIE,
)
from .snagfilms import (
SnagFilmsIE,
SnagFilmsEmbedIE,
)
from .snotr import SnotrIE
from .sohu import SohuIE
from .soundcloud import (
@ -879,6 +875,10 @@
)
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
)
from .viewster import ViewsterIE
from .viidea import ViideaIE
from .vimeo import (

View file

@ -51,7 +51,7 @@
from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
@ -1924,10 +1924,10 @@ def _playlist_from_matches(matches, getter=None, ie=None):
if onionstudios_url:
return self.url_result(onionstudios_url)
# Look for SnagFilms embeds
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
if snagfilms_url:
return self.url_result(snagfilms_url)
# Look for ViewLift embeds
viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if viewlift_url:
return self.url_result(viewlift_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)

View file

@ -13,8 +13,12 @@
)
class SnagFilmsEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
class ViewLiftBaseIE(InfoExtractor):
_DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
webpage)
if mobj:
return mobj.group('url')
@ -55,6 +59,7 @@ def _real_extract(self, url):
'Film %s is not playable in your area.' % video_id, expected=True)
formats = []
has_bitrate = False
for source in self._parse_json(js_to_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
file_ = source.get('file')
@ -63,22 +68,25 @@ def _real_extract(self, url):
type_ = source.get('type')
ext = determine_ext(file_)
format_id = source.get('label') or ext
if all(v == 'm3u8' for v in (type_, ext)):
if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls'))
else:
bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
file_, 'bitrate', default=None))
if not has_bitrate and bitrate:
has_bitrate = True
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append({
'url': file_,
'format_id': format_id,
'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
'tbr': bitrate,
'height': height,
})
self._sort_formats(formats)
field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
title = self._search_regex(
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
@ -91,8 +99,8 @@ def _real_extract(self, url):
}
class SnagFilmsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942',
@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor):
# Film is not available.
'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
'only_matching': True,
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
}, {
'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
domain, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
@ -170,7 +184,7 @@ def _real_extract(self, url):
return {
'_type': 'url_transparent',
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id,
'display_id': display_id,
'title': title,
@ -178,4 +192,5 @@ def _real_extract(self, url):
'thumbnail': thumbnail,
'duration': duration,
'categories': categories,
'ie_key': 'ViewLiftEmbed',
}