Rewrite should_skip_file

This commit is contained in:
McCloudS
2025-02-06 10:38:06 -07:00
committed by GitHub
parent 8b0396d957
commit 58bc3fe961

View File

@@ -787,7 +787,7 @@ def define_subtitle_language_naming(language: LanguageCode, type):
"NATIVE": lambda : language.to_name(in_english=False) "NATIVE": lambda : language.to_name(in_english=False)
} }
if transcribe_or_translate == 'translate': if transcribe_or_translate == 'translate':
language = LanguageCode.from_string('eng') language = LanguageCode.ENGLISH
return switch_dict.get(type, language.to_name)() return switch_dict.get(type, language.to_name)()
def name_subtitle(file_path: str, language: LanguageCode) -> str: def name_subtitle(file_path: str, language: LanguageCode) -> str:
@@ -1077,76 +1077,73 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, force_language:
task_queue.put(task) task_queue.put(task)
logging.info(f"task_queue.put(task)({task['path']}, {task['transcribe_or_translate']}, {task['force_language']})") logging.info(f"task_queue.put(task)({task['path']}, {task['transcribe_or_translate']}, {task['force_language']})")
def should_skip_file(file_path: str, transcribe_language: LanguageCode) -> bool: def should_skip_file(file_path: str, target_language: LanguageCode) -> bool:
"""Determine if subtitle generation should be skipped for a file.""" """
Determines if subtitle generation should be skipped for a file.
Args:
file_path: Path to the media file.
target_language: The desired language for transcription.
Returns:
True if the file should be skipped, False otherwise.
"""
base_name = os.path.basename(file_path) base_name = os.path.basename(file_path)
file_name, file_ext = os.path.splitext(base_name) file_name, file_ext = os.path.splitext(base_name)
# 1. Check for existing LRC files for audio files # 1. Skip if it's an audio file and an LRC file already exists.
if isAudioFileExtension(file_ext) and lrc_for_audio_files: if isAudioFileExtension(file_ext) and lrc_for_audio_files:
lrc_path = os.path.join(os.path.dirname(file_path), f"{file_name}.lrc") lrc_path = os.path.join(os.path.dirname(file_path), f"{file_name}.lrc")
if os.path.exists(lrc_path): if os.path.exists(lrc_path):
logging.info(f"Skipping {base_name}: LRC file already exists at {lrc_path}") logging.info(f"Skipping {base_name}: LRC file already exists.")
return True return True
# 2. Skip if unknown language and configured to skip unknowns # 2. Skip if language detection failed and we are configured to skip unknowns.
if skip_unknown_language and transcribe_language == LanguageCode.NONE: if skip_unknown_language and target_language == LanguageCode.NONE:
logging.info(f"Skipping {base_name}: Language detection failed (unknown language)") logging.info(f"Skipping {base_name}: Unknown language and skip_unknown_language is enabled.")
return True return True
# 3. Check subtitle existence in target language # 3. Skip if a subtitle already exists in the target language.
if skip_if_to_transcribe_sub_already_exist: if skip_if_to_transcribe_sub_already_exist and (has_subtitle_language(file_path, target_language) or has_subtitle_of_language_in_folder(file_path, target_language)):
if has_subtitle_language(file_path, transcribe_language): lang_name = target_language.to_name()
lang_name = transcribe_language.to_name() logging.info(f"Skipping {base_name}: Subtitles already exist in {lang_name}.")
lang_code = transcribe_language.to_iso_639_1() return True
logging.info(f"Skipping {base_name}: Existing {lang_name} ({lang_code}) subtitles found")
return True
# 4. Check against internal subtitle skip list # 4. Skip if an internal subtitle exists in skipifinternalsublang language.
if skipifinternalsublang and has_subtitle_language(file_path, skipifinternalsublang): if skipifinternalsublang and has_subtitle_language(file_path, skipifinternalsublang):
lang_name = skipifinternalsublang.to_name() lang_name = skipifinternalsublang.to_name()
lang_code = skipifinternalsublang.to_iso_639_1() logging.info(f"Skipping {base_name}: Internal subtitles in {lang_name} already exist.")
logging.info(f"Skipping {base_name}: Internal {lang_name} ({lang_code}) subtitles present")
return True return True
# 5. Check for external subtitles in configured language # 5. Skip if an external subtitle exists in the namesublang language
if skipifexternalsub and LanguageCode.is_valid_language(namesublang): if skipifexternalsub and namesublang and LanguageCode.is_valid_language(namesublang):
target_lang = LanguageCode.from_string(namesublang) external_lang = LanguageCode.from_string(namesublang)
if has_subtitle_language(file_path, target_lang): if has_subtitle_language(file_path, external_lang):
lang_name = target_lang.to_name() lang_name = external_lang.to_name()
lang_code = target_lang.to_iso_639_1() logging.info(f"Skipping {base_name}: External subtitles in {lang_name} already exist.")
logging.info(f"Skipping {base_name}: External {lang_name} ({lang_code}) subtitles exist")
return True return True
# 6. Check against global subtitle language skip list # 6. Skip if any subtitle language is in the skip list.
existing_sub_langs = get_subtitle_languages(file_path) if any(lang in skip_lang_codes_list for lang in get_subtitle_languages(file_path)):
for lang in existing_sub_langs: logging.info(f"Skipping {base_name}: Contains a skipped subtitle language.")
if lang in skip_lang_codes_list: return True
lang_name = lang.to_name()
lang_code = lang.to_iso_639_1()
logging.info(f"Skipping {base_name}: Contains skipped subtitle language {lang_name} ({lang_code})")
return True
# 7. Audio language checks # 7. Audio track checks
audio_langs = get_audio_languages(file_path) audio_langs = get_audio_languages(file_path)
# 7a. Limit to preferred audio languages # 7a. Limit to preferred audio languages
if limit_to_preferred_audio_languages: if limit_to_preferred_audio_languages:
preferred_names = [lang.to_name() for lang in preferred_audio_languages] if not any(lang in preferred_audio_languages for lang in audio_langs):
found_audio = any(lang in preferred_audio_languages for lang in audio_langs) preferred_names = [lang.to_name() for lang in preferred_audio_languages]
if not found_audio:
logging.info(f"Skipping {base_name}: No preferred audio tracks found (looking for {', '.join(preferred_names)})") logging.info(f"Skipping {base_name}: No preferred audio tracks found (looking for {', '.join(preferred_names)})")
return True return True
# 7b. Check for audio languages in skip list # 7b. Skip if the audio track language is in the skip list
for lang in audio_langs: if any(lang in skip_if_audio_track_is_in_list for lang in audio_langs):
if lang in skip_if_audio_track_is_in_list: logging.info(f"Skipping {base_name}: Contains a skipped audio language.")
lang_name = lang.to_name() return True
lang_code = lang.to_iso_639_1()
logging.info(f"Skipping {base_name}: Contains skipped audio language {lang_name} ({lang_code})")
return True
logging.debug(f"Proceeding with {base_name}: No skip conditions met (Language: {transcribe_language.to_name() if transcribe_language else 'auto-detect'})") logging.debug(f"Processing {base_name}: No skip conditions met.")
return False return False
def get_subtitle_languages(video_path): def get_subtitle_languages(video_path):