From bec60ea49aa4752cf1216cbc24ba4b850cce2d78 Mon Sep 17 00:00:00 2001 From: Xuhao Zhu Date: Sat, 20 Apr 2024 14:06:53 +0800 Subject: [PATCH 1/6] Update subgen.py add queuing and threading. --- subgen.py | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/subgen.py b/subgen.py index 262ce47..390b369 100644 --- a/subgen.py +++ b/subgen.py @@ -115,6 +115,21 @@ in_docker = os.path.exists('/.dockerenv') docker_status = "Docker" if in_docker else "Standalone" last_print_time = None +#start queue +global task_queue +task_queue = queue.Queue() + +def transcription_worker(): + while True: + task = task_queue.get() + gen_subtitles(task['path'], task['transcribe_or_translate'], task['force'],task['force_language']) + task_queue.task_done() + # show queue + logging.debug(f"There are {task_queue.qsize()} tasks left in the queue.") + +for _ in range(concurrent_transcriptions): + threading.Thread(target=transcription_worker, daemon=True).start() + # Define a filter class class MultiplePatternsFilter(logging.Filter): def filter(self, record): @@ -294,7 +309,8 @@ def receive_tautulli_webhook( fullpath = file logging.debug("Path of file: " + fullpath) - gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) else: return { "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"} @@ -321,7 +337,8 @@ def receive_plex_webhook( fullpath = get_plex_file_name(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.debug("Path of file: " + fullpath) - gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.") except Exception as e: @@ -346,7 +363,8 @@ def receive_jellyfin_webhook( fullpath = get_jellyfin_file_name(ItemId, jellyfinserver, jellyfintoken) logging.debug(f"Path of file: {fullpath}") - gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) try: refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken) logging.info(f"Metadata for item {ItemId} refreshed successfully.") @@ -379,7 +397,8 @@ def receive_emby_webhook( if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay: logging.debug("Path of file: " + fullpath) - gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) return "" @@ -565,6 +584,15 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo files_to_transcribe.remove(file_path) delete_model() +def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: + global task_queue + task = { + 'path': file_path, + 'transcribe_or_translate': transcription_type, + 'force': add_to_front, + 'force_language':force_language + } + task_queue.put(task) def get_file_name_without_extension(file_path): file_name, file_extension = os.path.splitext(file_path) @@ -736,7 +764,8 @@ if monitor: if has_audio(file_path): # Call the gen_subtitles function logging.info(f"File: {path_mapping(file_path)} was added") - gen_subtitles(path_mapping(file_path), transcribe_or_translate, False) + # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) def on_created(self, event): self.create_subtitle(event) def on_modified(self, event): @@ -751,11 +780,13 @@ def transcribe_existing(transcribe_folders, forceLanguage=None): for root, dirs, files in os.walk(path): for file in files: file_path = os.path.join(root, file) - gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) + # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) # if the path specified was actually a single file and not a folder, process it if os.path.isfile(path): if has_audio(path): - gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) + # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) # Set up the observer to watch for new files if monitor: observer = Observer() From 72f264870fc6127eaa636a597e32e9679771a671 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Sat, 20 Apr 2024 08:17:08 -0600 Subject: [PATCH 2/6] Cleaned up checks --- subgen.py | 67 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/subgen.py b/subgen.py index 390b369..0f8efcf 100644 --- a/subgen.py +++ b/subgen.py @@ -83,8 +83,8 @@ def update_env_variables(): path_mapping_from = os.getenv('PATH_MAPPING_FROM', r'/tv') path_mapping_to = os.getenv('PATH_MAPPING_TO', r'/Volumes/TV') model_location = os.getenv('MODEL_PATH', './models') - monitor = convert_to_bool(os.getenv('MONITOR', False)) - transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', '') + monitor = convert_to_bool(os.getenv('MONITOR', True)) + transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', './test') transcribe_or_translate = os.getenv('TRANSCRIBE_OR_TRANSLATE', 'transcribe') force_detected_language_to = os.getenv('FORCE_DETECTED_LANGUAGE_TO', '').lower() clear_vram_on_complete = convert_to_bool(os.getenv('CLEAR_VRAM_ON_COMPLETE', True)) @@ -310,7 +310,8 @@ def receive_tautulli_webhook( logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + if gen_subtitles_check(path_mapping(fullpath)): + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) else: return { "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"} @@ -338,7 +339,8 @@ def receive_plex_webhook( logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + if gen_subtitles_check(path_mapping(fullpath)): + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.") except Exception as e: @@ -364,7 +366,8 @@ def receive_jellyfin_webhook( logging.debug(f"Path of file: {fullpath}") # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + if gen_subtitles_check(path_mapping(fullpath)): + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) try: refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken) logging.info(f"Metadata for item {ItemId} refreshed successfully.") @@ -398,7 +401,8 @@ def receive_emby_webhook( if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay: logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + if gen_subtitles_check(path_mapping(fullpath)): + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) return "" @@ -509,6 +513,27 @@ def write_lrc(result, file_path): fraction = int((segment.start - int(segment.start)) * 100) file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n") +def gen_subtitles_check(file_path: str): + if not has_audio(file_path): + logging.debug(f"{file_path} doesn't have any audio to transcribe!") + return False + + if file_path in files_to_transcribe: + logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") + return + + message = None + if has_subtitle_language(file_path, skipifinternalsublang): + message = f"{file_path} already has an internal subtitle we want, skipping generation" + elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension): + message = f"{file_path} already has a subtitle created for this, skipping it" + elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH): + message = f"{file_path} already has a SDH subtitle created for this, skipping it" + if message: + logging.info(message) + return False + return True + def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: """Generates subtitles for a video file. @@ -521,31 +546,14 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo """ try: - if not has_audio(file_path): - logging.debug(f"{file_path} doesn't have any audio to transcribe!") - return None - - if file_path in files_to_transcribe: - logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") - return - - message = None - if has_subtitle_language(file_path, skipifinternalsublang): - message = f"{file_path} already has an internal subtitle we want, skipping generation" - elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension): - message = f"{file_path} already has a subtitle created for this, skipping it" - elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH): - message = f"{file_path} already has a SDH subtitle created for this, skipping it" - if message: - logging.info(message) - return message + if add_to_front: files_to_transcribe.insert(0, file_path) else: files_to_transcribe.append(file_path) logging.info(f"Added {os.path.basename(file_path)} for transcription.") - logging.info(f"{len(files_to_transcribe)} files in the queue for transcription") + #logging.info(f"{len(files_to_transcribe)} files in the queue for transcription") logging.info(f"Transcribing file: {os.path.basename(file_path)}") start_time = time.time() @@ -765,7 +773,8 @@ if monitor: # Call the gen_subtitles function logging.info(f"File: {path_mapping(file_path)} was added") # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False) - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) + if gen_subtitles_check(path_mapping(fullpath)): + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) def on_created(self, event): self.create_subtitle(event) def on_modified(self, event): @@ -781,12 +790,14 @@ def transcribe_existing(transcribe_folders, forceLanguage=None): for file in files: file_path = os.path.join(root, file) # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) + if gen_subtitles_check(path_mapping(file_path)): + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) # if the path specified was actually a single file and not a folder, process it if os.path.isfile(path): if has_audio(path): # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) - gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) + if gen_subtitles_check(path_mapping(path)): + gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) # Set up the observer to watch for new files if monitor: observer = Observer() From 8ddebaf701bece9be5c3e6869b6081cd87a47b33 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Sat, 20 Apr 2024 08:18:03 -0600 Subject: [PATCH 3/6] Emptied MONITOR and FOLDERS var (was from my own testing) --- subgen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subgen.py b/subgen.py index 0f8efcf..5b819b3 100644 --- a/subgen.py +++ b/subgen.py @@ -83,8 +83,8 @@ def update_env_variables(): path_mapping_from = os.getenv('PATH_MAPPING_FROM', r'/tv') path_mapping_to = os.getenv('PATH_MAPPING_TO', r'/Volumes/TV') model_location = os.getenv('MODEL_PATH', './models') - monitor = convert_to_bool(os.getenv('MONITOR', True)) - transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', './test') + monitor = convert_to_bool(os.getenv('MONITOR', False)) + transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', '') transcribe_or_translate = os.getenv('TRANSCRIBE_OR_TRANSLATE', 'transcribe') force_detected_language_to = os.getenv('FORCE_DETECTED_LANGUAGE_TO', '').lower() clear_vram_on_complete = convert_to_bool(os.getenv('CLEAR_VRAM_ON_COMPLETE', True)) From 60520cef637442d0bf43e7790a4a3335bfa8d1d4 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Sat, 20 Apr 2024 08:45:56 -0600 Subject: [PATCH 4/6] removed check function and moved it under gen_subtitles_queue --- subgen.py | 65 ++++++++++++++++++++++++------------------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/subgen.py b/subgen.py index 5b819b3..f0a1d03 100644 --- a/subgen.py +++ b/subgen.py @@ -75,7 +75,7 @@ def update_env_variables(): procaddedmedia = convert_to_bool(os.getenv('PROCADDEDMEDIA', True)) procmediaonplay = convert_to_bool(os.getenv('PROCMEDIAONPLAY', True)) namesublang = os.getenv('NAMESUBLANG', 'aa') - skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'eng') + skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'aa') webhookport = int(os.getenv('WEBHOOKPORT', 9000)) word_level_highlight = convert_to_bool(os.getenv('WORD_LEVEL_HIGHLIGHT', False)) debug = convert_to_bool(os.getenv('DEBUG', True)) @@ -310,8 +310,7 @@ def receive_tautulli_webhook( logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - if gen_subtitles_check(path_mapping(fullpath)): - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) else: return { "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"} @@ -339,8 +338,7 @@ def receive_plex_webhook( logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - if gen_subtitles_check(path_mapping(fullpath)): - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.") except Exception as e: @@ -366,8 +364,7 @@ def receive_jellyfin_webhook( logging.debug(f"Path of file: {fullpath}") # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - if gen_subtitles_check(path_mapping(fullpath)): - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) try: refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken) logging.info(f"Metadata for item {ItemId} refreshed successfully.") @@ -401,8 +398,7 @@ def receive_emby_webhook( if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay: logging.debug("Path of file: " + fullpath) # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) - if gen_subtitles_check(path_mapping(fullpath)): - gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) + gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) return "" @@ -513,28 +509,6 @@ def write_lrc(result, file_path): fraction = int((segment.start - int(segment.start)) * 100) file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n") -def gen_subtitles_check(file_path: str): - if not has_audio(file_path): - logging.debug(f"{file_path} doesn't have any audio to transcribe!") - return False - - if file_path in files_to_transcribe: - logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") - return - - message = None - if has_subtitle_language(file_path, skipifinternalsublang): - message = f"{file_path} already has an internal subtitle we want, skipping generation" - elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension): - message = f"{file_path} already has a subtitle created for this, skipping it" - elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH): - message = f"{file_path} already has a SDH subtitle created for this, skipping it" - if message: - logging.info(message) - return False - return True - - def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: """Generates subtitles for a video file. @@ -594,6 +568,26 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: global task_queue + + if not has_audio(file_path): + logging.debug(f"{file_path} doesn't have any audio to transcribe!") + return + + if file_path in files_to_transcribe: + logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") + return + + message = None + if has_subtitle_language(file_path, skipifinternalsublang): + message = f"{file_path} already has an internal subtitle we want, skipping generation" + elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension): + message = f"{file_path} already has a subtitle created for this, skipping it" + elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH): + message = f"{file_path} already has a SDH subtitle created for this, skipping it" + if message: + logging.info(message) + return + task = { 'path': file_path, 'transcribe_or_translate': transcription_type, @@ -773,8 +767,7 @@ if monitor: # Call the gen_subtitles function logging.info(f"File: {path_mapping(file_path)} was added") # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False) - if gen_subtitles_check(path_mapping(fullpath)): - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) def on_created(self, event): self.create_subtitle(event) def on_modified(self, event): @@ -790,14 +783,12 @@ def transcribe_existing(transcribe_folders, forceLanguage=None): for file in files: file_path = os.path.join(root, file) # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) - if gen_subtitles_check(path_mapping(file_path)): - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) # if the path specified was actually a single file and not a folder, process it if os.path.isfile(path): if has_audio(path): # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) - if gen_subtitles_check(path_mapping(path)): - gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) # Set up the observer to watch for new files if monitor: observer = Observer() From 2e6bf94b0977df6a99b767998184d5a16ab8dd4f Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Sat, 20 Apr 2024 08:47:04 -0600 Subject: [PATCH 5/6] Set skipifinternalsublang back to default. --- subgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subgen.py b/subgen.py index f0a1d03..f107b37 100644 --- a/subgen.py +++ b/subgen.py @@ -75,7 +75,7 @@ def update_env_variables(): procaddedmedia = convert_to_bool(os.getenv('PROCADDEDMEDIA', True)) procmediaonplay = convert_to_bool(os.getenv('PROCMEDIAONPLAY', True)) namesublang = os.getenv('NAMESUBLANG', 'aa') - skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'aa') + skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'eng') webhookport = int(os.getenv('WEBHOOKPORT', 9000)) word_level_highlight = convert_to_bool(os.getenv('WORD_LEVEL_HIGHLIGHT', False)) debug = convert_to_bool(os.getenv('DEBUG', True)) From 8a6e625365429290441c6035a81ec867eb5b811f Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Sat, 20 Apr 2024 11:28:26 -0600 Subject: [PATCH 6/6] add queue tracking for Bazarr General clean up, removed the old array that tracked items and added Bazarr. Bazarr doesn't obey the CONCURRENT_TRANSCRIPTIONS, which is probably best since those have a fixed timeout they need to respond by. --- subgen.py | 62 +++++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/subgen.py b/subgen.py index f107b37..53e8e22 100644 --- a/subgen.py +++ b/subgen.py @@ -109,7 +109,6 @@ update_env_variables() app = FastAPI() model = None -files_to_transcribe = [] in_docker = os.path.exists('/.dockerenv') docker_status = "Docker" if in_docker else "Standalone" @@ -122,8 +121,11 @@ task_queue = queue.Queue() def transcription_worker(): while True: task = task_queue.get() - gen_subtitles(task['path'], task['transcribe_or_translate'], task['force'],task['force_language']) - task_queue.task_done() + if 'Bazarr-' in task['path']: + logging.info(f"Skipping processing for {task['path']} as it is handled by ASR.") + else: + gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language']) + task_queue.task_done() # show queue logging.debug(f"There are {task_queue.qsize()} tasks left in the queue.") @@ -309,7 +311,6 @@ def receive_tautulli_webhook( fullpath = file logging.debug("Path of file: " + fullpath) - # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) else: return { @@ -337,7 +338,6 @@ def receive_plex_webhook( fullpath = get_plex_file_name(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.debug("Path of file: " + fullpath) - # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken) logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.") @@ -363,7 +363,6 @@ def receive_jellyfin_webhook( fullpath = get_jellyfin_file_name(ItemId, jellyfinserver, jellyfintoken) logging.debug(f"Path of file: {fullpath}") - # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) try: refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken) @@ -397,7 +396,6 @@ def receive_emby_webhook( if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay: logging.debug("Path of file: " + fullpath) - # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True) return "" @@ -424,14 +422,17 @@ def asr( try: logging.info(f"Transcribing file from Bazarr/ASR webhook") result = None - random_name = random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6) + random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6)) if force_detected_language_to: language = force_detected_language_to start_time = time.time() start_model() - files_to_transcribe.insert(0, f"Bazarr-asr-{random_name}") + + task_id = { 'path': f"Bazarr-asr-{random_name}" } + task_queue.put(task_id) + audio_data = np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0 if model_prompt: custom_prompt = greetings_translations.get(language, '') or custom_model_prompt @@ -446,8 +447,7 @@ def asr( except Exception as e: logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") finally: - if f"Bazarr-asr-{random_name}" in files_to_transcribe: - files_to_transcribe.remove(f"Bazarr-asr-{random_name}") + task_queue.task_done() delete_model() if result: return StreamingResponse( @@ -470,8 +470,11 @@ def detect_language( logging.info(f"Detect language is set to detect on the first {detect_language_length} seconds of the audio.") try: start_model() - random_name = random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6) - files_to_transcribe.insert(0, f"Bazarr-detect-language-{random_name}") + random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6)) + + task_id = { 'path': f"Bazarr-detect-language-{random_name}" } + task_queue.put(task_id) + audio_data = np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0 detected_lang_code = model.transcribe_stable(whisper.pad_or_trim(audio_data, detect_language_length * 16000), input_sr=16000).language @@ -479,8 +482,7 @@ def detect_language( logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") finally: - if f"Bazarr-detect-language-{random_name}" in files_to_transcribe: - files_to_transcribe.remove(f"Bazarr-detect-language-{random_name}") + task_queue.task_done() delete_model() return {"detected_language": whisper_languages.get(detected_lang_code, detected_lang_code) , "language_code": detected_lang_code} @@ -492,7 +494,7 @@ def start_model(): model = stable_whisper.load_faster_whisper(whisper_model, download_root=model_location, device=transcribe_device, cpu_threads=whisper_threads, num_workers=concurrent_transcriptions, compute_type=compute_type) def delete_model(): - if clear_vram_on_complete and len(files_to_transcribe) == 0: + if clear_vram_on_complete and task_queue.qsize() == 0: global model logging.debug("Queue is empty, clearing/releasing VRAM") model = None @@ -509,25 +511,17 @@ def write_lrc(result, file_path): fraction = int((segment.start - int(segment.start)) * 100) file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n") -def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: +def gen_subtitles(file_path: str, transcription_type: str, force_language=None) -> None: """Generates subtitles for a video file. Args: file_path: str - The path to the video file. transcription_type: str - The type of transcription or translation to perform. - add_to_front: bool - Whether to add the file to the front of the transcription queue. Default is True. force_language: str - The language to force for transcription or translation. Default is None. """ try: - - - if add_to_front: - files_to_transcribe.insert(0, file_path) - else: - files_to_transcribe.append(file_path) logging.info(f"Added {os.path.basename(file_path)} for transcription.") - #logging.info(f"{len(files_to_transcribe)} files in the queue for transcription") logging.info(f"Transcribing file: {os.path.basename(file_path)}") start_time = time.time() @@ -562,21 +556,15 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo logging.info(f"Error processing or transcribing {file_path}: {e}") finally: - if file_path in files_to_transcribe: - files_to_transcribe.remove(file_path) delete_model() -def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None: +def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=None) -> None: global task_queue if not has_audio(file_path): logging.debug(f"{file_path} doesn't have any audio to transcribe!") return - if file_path in files_to_transcribe: - logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") - return - message = None if has_subtitle_language(file_path, skipifinternalsublang): message = f"{file_path} already has an internal subtitle we want, skipping generation" @@ -591,7 +579,6 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=Tr task = { 'path': file_path, 'transcribe_or_translate': transcription_type, - 'force': add_to_front, 'force_language':force_language } task_queue.put(task) @@ -766,8 +753,7 @@ if monitor: if has_audio(file_path): # Call the gen_subtitles function logging.info(f"File: {path_mapping(file_path)} was added") - # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False) - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate) def on_created(self, event): self.create_subtitle(event) def on_modified(self, event): @@ -782,13 +768,11 @@ def transcribe_existing(transcribe_folders, forceLanguage=None): for root, dirs, files in os.walk(path): for file in files: file_path = os.path.join(root, file) - # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) - gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, forceLanguage) # if the path specified was actually a single file and not a folder, process it if os.path.isfile(path): if has_audio(path): - # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) - gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) + gen_subtitles_queue(path_mapping(path), transcribe_or_translate, forceLanguage) # Set up the observer to watch for new files if monitor: observer = Observer()