[extractor] Import _ALL_CLASSES lazily

This significantly speeds up `import yt_dlp` in the absence of `lazy_extractors`
2024-11-10 01:02:13 +01:00 · 2022-06-15 18:00:34 +05:30 · 2022-06-15 18:00:34 +05:30 · 560738f34d
commit 560738f34d
parent 99d10bf607
6 changed files with 49 additions and 30 deletions
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -53,7 +53,7 @@ def get_all_ies():
    if os.path.exists(PLUGINS_DIRNAME):
        os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
    try:
-        from yt_dlp.extractor import _ALL_CLASSES
+        from yt_dlp.extractor.extractors import _ALL_CLASSES
    finally:
        if os.path.exists(BLOCKED_DIRNAME):
            os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -38,8 +38,6 @@
 from .cookies import load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
 from .downloader.rtmp import rtmpdump_version
-from .extractor import _LAZY_LOADER
-from .extractor import _PLUGIN_CLASSES as plugin_extractors
 from .extractor import gen_extractor_classes, get_info_extractor
 from .extractor.openload import PhantomJSwrapper
 from .minicurses import format_text
@ -3659,6 +3657,10 @@ def print_debug_header(self):
        if not self.params.get('verbose'):
            return

+        # These imports can be slow. So import them only as needed
+        from .extractor.extractors import _LAZY_LOADER
+        from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+
        def get_encoding(stream):
            ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
            if not supports_terminal_sequences(stream):
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -12,7 +12,7 @@
 from .compat import compat_getpass, compat_shlex_quote
 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
 from .downloader import FileDownloader
-from .extractor import GenericIE, list_extractor_classes
+from .extractor import list_extractor_classes
 from .extractor.adobepass import MSO_INFO
 from .extractor.common import InfoExtractor
 from .options import parseOpts
@ -79,6 +79,10 @@ def get_urls(urls, batchfile, verbose):


 def print_extractor_information(opts, urls):
+    # Importing GenericIE is currently slow since it imports other extractors
+    # TODO: Move this back to module level after generalization of embed detection
+    from .extractor.generic import GenericIE
+
    out = ''
    if opts.list_extractors:
        urls = dict.fromkeys(urls, False)
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@ -33,7 +33,7 @@ def _is_package(module):

 def passthrough_module(parent, child, *, callback=lambda _: None):
    parent_module = importlib.import_module(parent)
-    child_module = importlib.import_module(child, parent)
+    child_module = None  # Import child module only as needed

    class PassthroughModule(types.ModuleType):
        def __getattr__(self, attr):
@ -41,6 +41,9 @@ def __getattr__(self, attr):
                with contextlib.suppress(ImportError):
                    return importlib.import_module(f'.{attr}', parent)

+            nonlocal child_module
+            child_module = child_module or importlib.import_module(child, parent)
+
            ret = _NO_ATTRIBUTE
            with contextlib.suppress(AttributeError):
                ret = getattr(child_module, attr)
--- a/yt_dlp/extractor/init.py
+++ b/yt_dlp/extractor/init.py
@ -1,32 +1,15 @@
-import contextlib
-import os
+from ..compat.compat_utils import passthrough_module

-from ..utils import load_plugins
-
-_LAZY_LOADER = False
-if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
-    with contextlib.suppress(ImportError):
-        from .lazy_extractors import *  # noqa: F403
-        from .lazy_extractors import _ALL_CLASSES
-        _LAZY_LOADER = True
-
-if not _LAZY_LOADER:
-    from ._extractors import *  # noqa: F403
-    _ALL_CLASSES = [  # noqa: F811
-        klass
-        for name, klass in globals().items()
-        if name.endswith('IE') and name != 'GenericIE'
-    ]
-    _ALL_CLASSES.append(GenericIE)  # noqa: F405
-
-_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
-_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+passthrough_module(__name__, '.extractors')
+del passthrough_module


 def gen_extractor_classes():
    """ Return a list of supported extractors.
    The order does matter; the first extractor matched is the one handling the URL.
    """
+    from .extractors import _ALL_CLASSES
+
    return _ALL_CLASSES


@ -39,10 +22,12 @@ def gen_extractors():

 def list_extractor_classes(age_limit=None):
    """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
+    from .generic import GenericIE
+
    yield from sorted(filter(
-        lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,  # noqa: F405
+        lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
        gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
-    yield GenericIE  # noqa: F405
+    yield GenericIE


 def list_extractors(age_limit=None):
@ -52,4 +37,6 @@ def list_extractors(age_limit=None):

 def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
-    return globals()[ie_name + 'IE']
+    from . import extractors
+
+    return getattr(extractors, f'{ie_name}IE')
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -0,0 +1,23 @@
+import contextlib
+import os
+
+from ..utils import load_plugins
+
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+    with contextlib.suppress(ImportError):
+        from .lazy_extractors import *  # noqa: F403
+        from .lazy_extractors import _ALL_CLASSES
+        _LAZY_LOADER = True
+
+if not _LAZY_LOADER:
+    from ._extractors import *  # noqa: F403
+    _ALL_CLASSES = [  # noqa: F811
+        klass
+        for name, klass in globals().items()
+        if name.endswith('IE') and name != 'GenericIE'
+    ]
+    _ALL_CLASSES.append(GenericIE)  # noqa: F405
+
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
+_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES