added SKIP_LANG_CODES

SKIP_LANG_CODES takes a pipe separated '|' list of 3 letter language codes to not process subtitles for if an audio stream in the file matches the list.
2024-11-02 08:29:28 -06:00
parent 2e11498d18
commit d2fde05ac7
1 changed files with 36 additions and 0 deletions
--- a/subgen.py
+++ b/subgen.py
@@ -70,6 +70,9 @@ lrc_for_audio_files = convert_to_bool(os.getenv('LRC_FOR_AUDIO_FILES', True))
 custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
 detect_language_length = os.getenv('DETECT_LANGUAGE_LENGTH', 30)
 skipifexternalsub = convert_to_bool(os.getenv('SKIPIFEXTERNALSUB', False))
+skip_lang_codes = os.getenv("SKIP_LANG_CODES", "")
+skip_lang_codes_list = skip_lang_codes.split("|") if skip_lang_codes else []
+
 try:
    kwargs = ast.literal_eval(os.getenv('SUBGEN_KWARGS', '{}') or '{}')
 except ValueError:
@@ -528,6 +531,8 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=
        message = f"{file_path} already has a Subgen SDH subtitle created for this, skipping it"
    elif os.path.exists(get_file_name_without_extension(file_path) + '.lrc'):
        message = f"{file_path} already has a LRC created for this, skipping it"
+    elif should_skip_language(get_video_audio_languages(video_path)):
+        message = f"Skipping subtitle generation for language: {video_language}")
        
    if message:
        logging.debug(message)
@@ -540,6 +545,37 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=
    }
    task_queue.put(task)

+def should_skip_languages(language_codes):
+    """
+    Check if any language in language_codes matches a code in skip_lang_codes_list.
+    """
+    for code in language_codes:
+        if code in skip_lang_codes_list:
+            return True
+    return False
+
+def get_audio_languages(video_path):
+    """
+    Extract language codes from each audio stream in the video file using pyav.
+    :param video_path: Path to the video file
+    :return: List of language codes for each audio stream
+    """
+    languages = []
+
+    # Open the video file
+    with av.open(video_path) as container:
+        # Iterate through each audio stream
+        for stream in container.streams.audio:
+            # Access the metadata for each audio stream
+            lang_code = stream.metadata.get('language')
+            if lang_code:
+                languages.append(lang_code)
+            else:
+                # Append 'und' (undefined) if no language metadata is present
+                languages.append('und')
+    
+    return languages
+
 def get_file_name_without_extension(file_path):
    file_name, file_extension = os.path.splitext(file_path)
    return file_name