mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 01:02:13 +01:00
Add webpage_url_basename info_dict field (Fixes #1938)
This commit is contained in:
parent
44c471c3b8
commit
29eb517403
3 changed files with 27 additions and 9 deletions
|
@ -13,20 +13,21 @@
|
|||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
from youtube_dl.utils import (
|
||||
timeconvert,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
orderedSet,
|
||||
DateRange,
|
||||
unified_strdate,
|
||||
encodeFilename,
|
||||
find_xpath_attr,
|
||||
get_meta_content,
|
||||
xpath_with_ns,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
orderedSet,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
|
@ -181,6 +182,12 @@ def test_str_to_int(self):
|
|||
self.assertEqual(str_to_int('123,456'), 123456)
|
||||
self.assertEqual(str_to_int('123.456'), 123456)
|
||||
|
||||
def test_url_basename(self):
|
||||
self.assertEqual(url_basename(u'http://foo.de/'), u'')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
|
@ -484,6 +485,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
|||
{
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
if process:
|
||||
|
@ -576,6 +578,7 @@ def make_result(embedded_info):
|
|||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
|
@ -596,6 +599,7 @@ def _fixup(r):
|
|||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
return r
|
||||
|
|
|
@ -1084,3 +1084,10 @@ def remove_start(s, start):
|
|||
if s.startswith(start):
|
||||
return s[len(start):]
|
||||
return s
|
||||
|
||||
|
||||
def url_basename(url):
|
||||
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
|
||||
if not m:
|
||||
return u''
|
||||
return m.group(1)
|
||||
|
|
Loading…
Reference in a new issue