From 6255e567d92e7d8fe7920c34db050a95b8d3ff7f Mon Sep 17 00:00:00 2001 From: Aakash Gajjar Date: Sat, 15 Feb 2020 12:30:12 +0530 Subject: [PATCH] [tiktok] fix regex --- youtube_dl/extractor/tiktok.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index d4f6d9055..613ac9cc8 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,16 +65,7 @@ def _extract_aweme(self, video_data, webpage): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www|vm)\. - (?:tiktok.com)\/ - (@(?P[\w\.]+))\/ - (?:video)\/ - ) - (?P[\d]{6,}) - ''' + _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P\d+)' _TESTS = [{ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', @@ -133,8 +124,8 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id, note='Downloading video webpage') json_string = self._search_regex( - r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P[^<]+)', - webpage, 'json_string', group='json_string') + r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P[^<]+)', + webpage, 'json_string', group='json_string_ld') json_data = self._parse_json(json_string, video_id) video_data = try_get(json_data, lambda x: x['props']['pageProps'], expected_type=dict)