[generic] Parse RSS enclosure URLs (Fixes #5091)

This commit is contained in:
Philipp Hagemeister 2015-03-02 15:21:11 +01:00
parent c75f0b361a
commit 76c73715fb

View file

@ -26,6 +26,7 @@
unsmuggle_url, unsmuggle_url,
UnsupportedError, UnsupportedError,
url_basename, url_basename,
xpath_text,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
@ -569,6 +570,16 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15', 'title': 'John Carlson Postgame 2/25/15',
}, },
}, },
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
'ext': 'm4v',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
}
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -580,11 +591,24 @@ def _extract_rss(self, url, video_id, doc):
playlist_desc_el = doc.find('./channel/description') playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{ entries = []
'_type': 'url', for it in doc.findall('./channel/item'):
'url': e.find('link').text, next_url = xpath_text(it, 'link', fatal=False)
'title': e.find('title').text, if not next_url:
} for e in doc.findall('./channel/item')] enclosure_nodes = it.findall('./enclosure')
for e in enclosure_nodes:
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
continue
entries.append({
'_type': 'url',
'url': next_url,
'title': it.find('title').text,
})
return { return {
'_type': 'playlist', '_type': 'playlist',