yt-dlp/yt_dlp/extractor/clyp.py

from .common import InfoExtractor
from ..utils import (
    float_or_none,
    parse_qs,
    unified_timestamp,
)


class ClypIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
    _TESTS = [{
        'url': 'https://clyp.it/iynkjk4b',
        'md5': '4bc6371c65210e7b372097fce4d92441',
        'info_dict': {
            'id': 'iynkjk4b',
            'ext': 'ogg',
            'title': 'research',
            'description': '#Research',
            'duration': 51.278,
            'timestamp': 1435524981,
            'upload_date': '20150628',
        },
    }, {
        'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
        'info_dict': {
            'id': 'b04p1odi',
            'ext': 'ogg',
            'title': 'GJ! (Reward Edit)',
            'description': 'Metal Resistance (THE ONE edition)',
            'duration': 177.789,
            'timestamp': 1528241278,
            'upload_date': '20180605',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://clyp.it/v42214lc',
        'md5': '4aca4dfc3236fb6d6ddc4ea08314f33f',
        'info_dict': {
            'id': 'v42214lc',
            'ext': 'wav',
            'title': 'i dont wanna go (old version)',
            'duration': 113.528,
            'timestamp': 1607348505,
            'upload_date': '20201207',
        },
    }]

    def _real_extract(self, url):
        audio_id = self._match_id(url)

        qs = parse_qs(url)
        token = qs.get('token', [None])[0]

        query = {}
        if token:
            query['token'] = token

        metadata = self._download_json(
            f'https://api.clyp.it/{audio_id}', audio_id, query=query)

        formats = []
        for secure in ('', 'Secure'):
            for ext in ('Ogg', 'Mp3'):
                format_id = f'{secure}{ext}'
                format_url = metadata.get(f'{format_id}Url')
                if format_url:
                    formats.append({
                        'url': format_url,
                        'format_id': format_id,
                        'vcodec': 'none',
                        'acodec': ext.lower(),
                    })

        page = self._download_webpage(url, video_id=audio_id)
        wav_url = self._html_search_regex(
            r'var\s*wavStreamUrl\s*=\s*["\'](?P<url>https?://[^\'"]+)', page, 'url', default=None)
        if wav_url:
            formats.append({
                'url': wav_url,
                'format_id': 'wavStreamUrl',
                'vcodec': 'none',
                'acodec': 'wav',
            })

        title = metadata['Title']
        description = metadata.get('Description')
        duration = float_or_none(metadata.get('Duration'))
        timestamp = unified_timestamp(metadata.get('DateCreated'))

        return {
            'id': audio_id,
            'title': title,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'formats': formats,
        }
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00			`from .common import InfoExtractor`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`from ..utils import (`
			`float_or_none,`
[utils] Add `parse_qs` 2021-08-22 21:02:00 +02:00			`parse_qs,`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`unified_timestamp,`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`)`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00

			`class ClypIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`_TESTS = [{`
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`'url': 'https://clyp.it/iynkjk4b',`
			`'md5': '4bc6371c65210e7b372097fce4d92441',`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00			`'info_dict': {`
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`'id': 'iynkjk4b',`
			`'ext': 'ogg',`
			`'title': 'research',`
			`'description': '#Research',`
			`'duration': 51.278,`
			`'timestamp': 1435524981,`
			`'upload_date': '20150628',`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00			`},`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`}, {`
			`'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',`
			`'info_dict': {`
			`'id': 'b04p1odi',`
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`'ext': 'ogg',`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`'title': 'GJ! (Reward Edit)',`
			`'description': 'Metal Resistance (THE ONE edition)',`
			`'duration': 177.789,`
			`'timestamp': 1528241278,`
			`'upload_date': '20180605',`
			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`}, {`
			`'url': 'https://clyp.it/v42214lc',`
			`'md5': '4aca4dfc3236fb6d6ddc4ea08314f33f',`
			`'info_dict': {`
			`'id': 'v42214lc',`
			`'ext': 'wav',`
			`'title': 'i dont wanna go (old version)',`
			`'duration': 113.528,`
			`'timestamp': 1607348505,`
			`'upload_date': '20201207',`
			`},`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`}]`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00
			`def _real_extract(self, url):`
			`audio_id = self._match_id(url)`

[utils] Add `parse_qs` 2021-08-22 21:02:00 +02:00			`qs = parse_qs(url)`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`token = qs.get('token', [None])[0]`

			`query = {}`
			`if token:`
			`query['token'] = token`

[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`metadata = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'https://api.clyp.it/{audio_id}', audio_id, query=query)`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00
			`formats = []`
			`for secure in ('', 'Secure'):`
			`for ext in ('Ogg', 'Mp3'):`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`format_id = f'{secure}{ext}'`
			`format_url = metadata.get(f'{format_id}Url')`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`if format_url:`
			`formats.append({`
			`'url': format_url,`
			`'format_id': format_id,`
			`'vcodec': 'none',`
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`'acodec': ext.lower(),`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`})`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00
[extractor/clyp] Support `wav` (#6102) Authored by: qulaz 2023-02-12 05:28:15 +01:00			`page = self._download_webpage(url, video_id=audio_id)`
			`wav_url = self._html_search_regex(`
			`r'var\swavStreamUrl\s=\s*["\'](?P<url>https?://[^\'"]+)', page, 'url', default=None)`
			`if wav_url:`
			`formats.append({`
			`'url': wav_url,`
			`'format_id': 'wavStreamUrl',`
			`'vcodec': 'none',`
			`'acodec': 'wav',`
			`})`

[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`title = metadata['Title']`
			`description = metadata.get('Description')`
			`duration = float_or_none(metadata.get('Duration'))`
[clyp] Add support for token protected media (closes #17184) 2018-08-07 18:27:08 +02:00			`timestamp = unified_timestamp(metadata.get('DateCreated'))`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00
			`return {`
			`'id': audio_id,`
			`'title': title,`
			`'description': description,`
[clyp] Improve and cleanup (Closes #7194) 2015-10-28 16:42:01 +01:00			`'duration': duration,`
			`'timestamp': timestamp,`
			`'formats': formats,`
[clyp] Add extractor Update __init__.py [clyp.it] Extract ID idiomatically and make duration and description optional 2015-10-16 02:23:09 +02:00			`}`