added SKIP_LANG_CODES
SKIP_LANG_CODES takes a pipe separated '|' list of 3 letter language codes to not process subtitles for if an audio stream in the file matches the list.
This commit is contained in:
36
subgen.py
36
subgen.py
@@ -70,6 +70,9 @@ lrc_for_audio_files = convert_to_bool(os.getenv('LRC_FOR_AUDIO_FILES', True))
|
|||||||
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
|
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
|
||||||
detect_language_length = os.getenv('DETECT_LANGUAGE_LENGTH', 30)
|
detect_language_length = os.getenv('DETECT_LANGUAGE_LENGTH', 30)
|
||||||
skipifexternalsub = convert_to_bool(os.getenv('SKIPIFEXTERNALSUB', False))
|
skipifexternalsub = convert_to_bool(os.getenv('SKIPIFEXTERNALSUB', False))
|
||||||
|
skip_lang_codes = os.getenv("SKIP_LANG_CODES", "")
|
||||||
|
skip_lang_codes_list = skip_lang_codes.split("|") if skip_lang_codes else []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs = ast.literal_eval(os.getenv('SUBGEN_KWARGS', '{}') or '{}')
|
kwargs = ast.literal_eval(os.getenv('SUBGEN_KWARGS', '{}') or '{}')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -528,6 +531,8 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=
|
|||||||
message = f"{file_path} already has a Subgen SDH subtitle created for this, skipping it"
|
message = f"{file_path} already has a Subgen SDH subtitle created for this, skipping it"
|
||||||
elif os.path.exists(get_file_name_without_extension(file_path) + '.lrc'):
|
elif os.path.exists(get_file_name_without_extension(file_path) + '.lrc'):
|
||||||
message = f"{file_path} already has a LRC created for this, skipping it"
|
message = f"{file_path} already has a LRC created for this, skipping it"
|
||||||
|
elif should_skip_language(get_video_audio_languages(video_path)):
|
||||||
|
message = f"Skipping subtitle generation for language: {video_language}")
|
||||||
|
|
||||||
if message:
|
if message:
|
||||||
logging.debug(message)
|
logging.debug(message)
|
||||||
@@ -540,6 +545,37 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=
|
|||||||
}
|
}
|
||||||
task_queue.put(task)
|
task_queue.put(task)
|
||||||
|
|
||||||
|
def should_skip_languages(language_codes):
|
||||||
|
"""
|
||||||
|
Check if any language in language_codes matches a code in skip_lang_codes_list.
|
||||||
|
"""
|
||||||
|
for code in language_codes:
|
||||||
|
if code in skip_lang_codes_list:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_audio_languages(video_path):
|
||||||
|
"""
|
||||||
|
Extract language codes from each audio stream in the video file using pyav.
|
||||||
|
:param video_path: Path to the video file
|
||||||
|
:return: List of language codes for each audio stream
|
||||||
|
"""
|
||||||
|
languages = []
|
||||||
|
|
||||||
|
# Open the video file
|
||||||
|
with av.open(video_path) as container:
|
||||||
|
# Iterate through each audio stream
|
||||||
|
for stream in container.streams.audio:
|
||||||
|
# Access the metadata for each audio stream
|
||||||
|
lang_code = stream.metadata.get('language')
|
||||||
|
if lang_code:
|
||||||
|
languages.append(lang_code)
|
||||||
|
else:
|
||||||
|
# Append 'und' (undefined) if no language metadata is present
|
||||||
|
languages.append('und')
|
||||||
|
|
||||||
|
return languages
|
||||||
|
|
||||||
def get_file_name_without_extension(file_path):
|
def get_file_name_without_extension(file_path):
|
||||||
file_name, file_extension = os.path.splitext(file_path)
|
file_name, file_extension = os.path.splitext(file_path)
|
||||||
return file_name
|
return file_name
|
||||||
|
|||||||
Reference in New Issue
Block a user