From bec60ea49aa4752cf1216cbc24ba4b850cce2d78 Mon Sep 17 00:00:00 2001
From: Xuhao Zhu <xhzhu@zju.edu.cn>
Date: Sat, 20 Apr 2024 14:06:53 +0800
Subject: [PATCH 1/6] Update subgen.py  add queuing and threading.

---
 subgen.py | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/subgen.py b/subgen.py
index 262ce47..390b369 100644
--- a/subgen.py
+++ b/subgen.py
@@ -115,6 +115,21 @@ in_docker = os.path.exists('/.dockerenv')
 docker_status = "Docker" if in_docker else "Standalone"
 last_print_time = None
 
+#start queue
+global task_queue
+task_queue = queue.Queue()
+
+def transcription_worker():
+    while True:
+        task = task_queue.get()
+        gen_subtitles(task['path'], task['transcribe_or_translate'], task['force'],task['force_language'])
+        task_queue.task_done()
+        # show queue
+        logging.debug(f"There are {task_queue.qsize()} tasks left in the queue.")
+
+for _ in range(concurrent_transcriptions):
+    threading.Thread(target=transcription_worker, daemon=True).start()
+
 # Define a filter class
 class MultiplePatternsFilter(logging.Filter):
     def filter(self, record):
@@ -294,7 +309,8 @@ def receive_tautulli_webhook(
             fullpath = file
             logging.debug("Path of file: " + fullpath)
 
-            gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
     else:
         return {
             "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"}
@@ -321,7 +337,8 @@ def receive_plex_webhook(
             fullpath = get_plex_file_name(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.debug("Path of file: " + fullpath)
 
-            gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.")
     except Exception as e:
@@ -346,7 +363,8 @@ def receive_jellyfin_webhook(
             fullpath = get_jellyfin_file_name(ItemId, jellyfinserver, jellyfintoken)
             logging.debug(f"Path of file: {fullpath}")
 
-            gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             try:
                 refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken)
                 logging.info(f"Metadata for item {ItemId} refreshed successfully.")
@@ -379,7 +397,8 @@ def receive_emby_webhook(
 
     if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay:
         logging.debug("Path of file: " + fullpath)
-        gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+        # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
+        gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
 
     return ""
     
@@ -565,6 +584,15 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo
             files_to_transcribe.remove(file_path)
         delete_model()
 
+def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
+    global task_queue
+    task = {
+        'path': file_path,
+        'transcribe_or_translate': transcription_type,
+        'force': add_to_front,
+        'force_language':force_language
+    }
+    task_queue.put(task)
 
 def get_file_name_without_extension(file_path):
     file_name, file_extension = os.path.splitext(file_path)
@@ -736,7 +764,8 @@ if monitor:
                 if has_audio(file_path):
                 # Call the gen_subtitles function
                     logging.info(f"File: {path_mapping(file_path)} was added")
-                    gen_subtitles(path_mapping(file_path), transcribe_or_translate, False)
+                    # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False)
+                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
         def on_created(self, event):
             self.create_subtitle(event)
         def on_modified(self, event):
@@ -751,11 +780,13 @@ def transcribe_existing(transcribe_folders, forceLanguage=None):
         for root, dirs, files in os.walk(path):
             for file in files:
                 file_path = os.path.join(root, file)
-                gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
+                # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
+                gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
     # if the path specified was actually a single file and not a folder, process it
     if os.path.isfile(path):
         if has_audio(path):
-            gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
+            # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
+            gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
      # Set up the observer to watch for new files
     if monitor:
         observer = Observer()

From 72f264870fc6127eaa636a597e32e9679771a671 Mon Sep 17 00:00:00 2001
From: McCloudS <64094529+McCloudS@users.noreply.github.com>
Date: Sat, 20 Apr 2024 08:17:08 -0600
Subject: [PATCH 2/6] Cleaned up checks

---
 subgen.py | 67 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/subgen.py b/subgen.py
index 390b369..0f8efcf 100644
--- a/subgen.py
+++ b/subgen.py
@@ -83,8 +83,8 @@ def update_env_variables():
     path_mapping_from = os.getenv('PATH_MAPPING_FROM', r'/tv')
     path_mapping_to = os.getenv('PATH_MAPPING_TO', r'/Volumes/TV')
     model_location = os.getenv('MODEL_PATH', './models')
-    monitor = convert_to_bool(os.getenv('MONITOR', False))
-    transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', '')
+    monitor = convert_to_bool(os.getenv('MONITOR', True))
+    transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', './test')
     transcribe_or_translate = os.getenv('TRANSCRIBE_OR_TRANSLATE', 'transcribe')
     force_detected_language_to = os.getenv('FORCE_DETECTED_LANGUAGE_TO', '').lower()
     clear_vram_on_complete = convert_to_bool(os.getenv('CLEAR_VRAM_ON_COMPLETE', True))
@@ -310,7 +310,8 @@ def receive_tautulli_webhook(
             logging.debug("Path of file: " + fullpath)
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            if gen_subtitles_check(path_mapping(fullpath)):
+                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
     else:
         return {
             "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"}
@@ -338,7 +339,8 @@ def receive_plex_webhook(
             logging.debug("Path of file: " + fullpath)
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            if gen_subtitles_check(path_mapping(fullpath)):
+                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.")
     except Exception as e:
@@ -364,7 +366,8 @@ def receive_jellyfin_webhook(
             logging.debug(f"Path of file: {fullpath}")
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            if gen_subtitles_check(path_mapping(fullpath)):
+                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             try:
                 refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken)
                 logging.info(f"Metadata for item {ItemId} refreshed successfully.")
@@ -398,7 +401,8 @@ def receive_emby_webhook(
     if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay:
         logging.debug("Path of file: " + fullpath)
         # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-        gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+        if gen_subtitles_check(path_mapping(fullpath)):
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
 
     return ""
     
@@ -509,6 +513,27 @@ def write_lrc(result, file_path):
             fraction = int((segment.start - int(segment.start)) * 100)
             file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n")
 
+def gen_subtitles_check(file_path: str):
+    if not has_audio(file_path):
+        logging.debug(f"{file_path} doesn't have any audio to transcribe!")
+        return False
+
+        if file_path in files_to_transcribe:
+            logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.")
+            return
+
+    message = None
+    if has_subtitle_language(file_path, skipifinternalsublang):
+        message = f"{file_path} already has an internal subtitle we want, skipping generation"
+    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension):
+        message = f"{file_path} already has a subtitle created for this, skipping it"
+    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH):
+        message = f"{file_path} already has a SDH subtitle created for this, skipping it"
+    if message:
+        logging.info(message)
+        return False
+    return True
+            
 
 def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
     """Generates subtitles for a video file.
@@ -521,31 +546,14 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo
     """
 
     try:
-        if not has_audio(file_path):
-            logging.debug(f"{file_path} doesn't have any audio to transcribe!")
-            return None
-
-        if file_path in files_to_transcribe:
-            logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.")
-            return
-
-        message = None
-        if has_subtitle_language(file_path, skipifinternalsublang):
-            message = f"{file_path} already has an internal subtitle we want, skipping generation"
-        elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension):
-            message = f"{file_path} already has a subtitle created for this, skipping it"
-        elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH):
-            message = f"{file_path} already has a SDH subtitle created for this, skipping it"
-        if message:
-            logging.info(message)
-            return message
+        
 
         if add_to_front:
             files_to_transcribe.insert(0, file_path)
         else:
             files_to_transcribe.append(file_path)
         logging.info(f"Added {os.path.basename(file_path)} for transcription.")
-        logging.info(f"{len(files_to_transcribe)} files in the queue for transcription")
+        #logging.info(f"{len(files_to_transcribe)} files in the queue for transcription")
         logging.info(f"Transcribing file: {os.path.basename(file_path)}")
 
         start_time = time.time()
@@ -765,7 +773,8 @@ if monitor:
                 # Call the gen_subtitles function
                     logging.info(f"File: {path_mapping(file_path)} was added")
                     # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False)
-                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
+                    if gen_subtitles_check(path_mapping(fullpath)):
+                        gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
         def on_created(self, event):
             self.create_subtitle(event)
         def on_modified(self, event):
@@ -781,12 +790,14 @@ def transcribe_existing(transcribe_folders, forceLanguage=None):
             for file in files:
                 file_path = os.path.join(root, file)
                 # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
-                gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
+                if gen_subtitles_check(path_mapping(file_path)):
+                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
     # if the path specified was actually a single file and not a folder, process it
     if os.path.isfile(path):
         if has_audio(path):
             # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
-            gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
+            if gen_subtitles_check(path_mapping(path)):
+                gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
      # Set up the observer to watch for new files
     if monitor:
         observer = Observer()

From 8ddebaf701bece9be5c3e6869b6081cd87a47b33 Mon Sep 17 00:00:00 2001
From: McCloudS <64094529+McCloudS@users.noreply.github.com>
Date: Sat, 20 Apr 2024 08:18:03 -0600
Subject: [PATCH 3/6] Emptied MONITOR and FOLDERS var

(was from my own testing)
---
 subgen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subgen.py b/subgen.py
index 0f8efcf..5b819b3 100644
--- a/subgen.py
+++ b/subgen.py
@@ -83,8 +83,8 @@ def update_env_variables():
     path_mapping_from = os.getenv('PATH_MAPPING_FROM', r'/tv')
     path_mapping_to = os.getenv('PATH_MAPPING_TO', r'/Volumes/TV')
     model_location = os.getenv('MODEL_PATH', './models')
-    monitor = convert_to_bool(os.getenv('MONITOR', True))
-    transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', './test')
+    monitor = convert_to_bool(os.getenv('MONITOR', False))
+    transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', '')
     transcribe_or_translate = os.getenv('TRANSCRIBE_OR_TRANSLATE', 'transcribe')
     force_detected_language_to = os.getenv('FORCE_DETECTED_LANGUAGE_TO', '').lower()
     clear_vram_on_complete = convert_to_bool(os.getenv('CLEAR_VRAM_ON_COMPLETE', True))

From 60520cef637442d0bf43e7790a4a3335bfa8d1d4 Mon Sep 17 00:00:00 2001
From: McCloudS <64094529+McCloudS@users.noreply.github.com>
Date: Sat, 20 Apr 2024 08:45:56 -0600
Subject: [PATCH 4/6] removed check function and moved it under
 gen_subtitles_queue

---
 subgen.py | 65 ++++++++++++++++++++++++-------------------------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/subgen.py b/subgen.py
index 5b819b3..f0a1d03 100644
--- a/subgen.py
+++ b/subgen.py
@@ -75,7 +75,7 @@ def update_env_variables():
     procaddedmedia = convert_to_bool(os.getenv('PROCADDEDMEDIA', True))
     procmediaonplay = convert_to_bool(os.getenv('PROCMEDIAONPLAY', True))
     namesublang = os.getenv('NAMESUBLANG', 'aa')
-    skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'eng')
+    skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'aa')
     webhookport = int(os.getenv('WEBHOOKPORT', 9000))
     word_level_highlight = convert_to_bool(os.getenv('WORD_LEVEL_HIGHLIGHT', False))
     debug = convert_to_bool(os.getenv('DEBUG', True))
@@ -310,8 +310,7 @@ def receive_tautulli_webhook(
             logging.debug("Path of file: " + fullpath)
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            if gen_subtitles_check(path_mapping(fullpath)):
-                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
     else:
         return {
             "message": "This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"}
@@ -339,8 +338,7 @@ def receive_plex_webhook(
             logging.debug("Path of file: " + fullpath)
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            if gen_subtitles_check(path_mapping(fullpath)):
-                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.")
     except Exception as e:
@@ -366,8 +364,7 @@ def receive_jellyfin_webhook(
             logging.debug(f"Path of file: {fullpath}")
 
             # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-            if gen_subtitles_check(path_mapping(fullpath)):
-                gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             try:
                 refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken)
                 logging.info(f"Metadata for item {ItemId} refreshed successfully.")
@@ -401,8 +398,7 @@ def receive_emby_webhook(
     if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay:
         logging.debug("Path of file: " + fullpath)
         # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
-        if gen_subtitles_check(path_mapping(fullpath)):
-            gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
+        gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
 
     return ""
     
@@ -513,28 +509,6 @@ def write_lrc(result, file_path):
             fraction = int((segment.start - int(segment.start)) * 100)
             file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n")
 
-def gen_subtitles_check(file_path: str):
-    if not has_audio(file_path):
-        logging.debug(f"{file_path} doesn't have any audio to transcribe!")
-        return False
-
-        if file_path in files_to_transcribe:
-            logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.")
-            return
-
-    message = None
-    if has_subtitle_language(file_path, skipifinternalsublang):
-        message = f"{file_path} already has an internal subtitle we want, skipping generation"
-    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension):
-        message = f"{file_path} already has a subtitle created for this, skipping it"
-    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH):
-        message = f"{file_path} already has a SDH subtitle created for this, skipping it"
-    if message:
-        logging.info(message)
-        return False
-    return True
-            
-
 def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
     """Generates subtitles for a video file.
 
@@ -594,6 +568,26 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo
 
 def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
     global task_queue
+    
+    if not has_audio(file_path):
+        logging.debug(f"{file_path} doesn't have any audio to transcribe!")
+        return
+
+    if file_path in files_to_transcribe:
+        logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.")
+        return
+
+    message = None
+    if has_subtitle_language(file_path, skipifinternalsublang):
+        message = f"{file_path} already has an internal subtitle we want, skipping generation"
+    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension):
+        message = f"{file_path} already has a subtitle created for this, skipping it"
+    elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH):
+        message = f"{file_path} already has a SDH subtitle created for this, skipping it"
+    if message:
+        logging.info(message)
+        return
+    
     task = {
         'path': file_path,
         'transcribe_or_translate': transcription_type,
@@ -773,8 +767,7 @@ if monitor:
                 # Call the gen_subtitles function
                     logging.info(f"File: {path_mapping(file_path)} was added")
                     # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False)
-                    if gen_subtitles_check(path_mapping(fullpath)):
-                        gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
+                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
         def on_created(self, event):
             self.create_subtitle(event)
         def on_modified(self, event):
@@ -790,14 +783,12 @@ def transcribe_existing(transcribe_folders, forceLanguage=None):
             for file in files:
                 file_path = os.path.join(root, file)
                 # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
-                if gen_subtitles_check(path_mapping(file_path)):
-                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
+                gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
     # if the path specified was actually a single file and not a folder, process it
     if os.path.isfile(path):
         if has_audio(path):
             # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
-            if gen_subtitles_check(path_mapping(path)):
-                gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
+            gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
      # Set up the observer to watch for new files
     if monitor:
         observer = Observer()

From 2e6bf94b0977df6a99b767998184d5a16ab8dd4f Mon Sep 17 00:00:00 2001
From: McCloudS <64094529+McCloudS@users.noreply.github.com>
Date: Sat, 20 Apr 2024 08:47:04 -0600
Subject: [PATCH 5/6] Set skipifinternalsublang back to default.

---
 subgen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subgen.py b/subgen.py
index f0a1d03..f107b37 100644
--- a/subgen.py
+++ b/subgen.py
@@ -75,7 +75,7 @@ def update_env_variables():
     procaddedmedia = convert_to_bool(os.getenv('PROCADDEDMEDIA', True))
     procmediaonplay = convert_to_bool(os.getenv('PROCMEDIAONPLAY', True))
     namesublang = os.getenv('NAMESUBLANG', 'aa')
-    skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'aa')
+    skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', 'eng')
     webhookport = int(os.getenv('WEBHOOKPORT', 9000))
     word_level_highlight = convert_to_bool(os.getenv('WORD_LEVEL_HIGHLIGHT', False))
     debug = convert_to_bool(os.getenv('DEBUG', True))

From 8a6e625365429290441c6035a81ec867eb5b811f Mon Sep 17 00:00:00 2001
From: McCloudS <64094529+McCloudS@users.noreply.github.com>
Date: Sat, 20 Apr 2024 11:28:26 -0600
Subject: [PATCH 6/6] add queue tracking for Bazarr

General clean up, removed the old array that tracked items and added Bazarr.  Bazarr doesn't obey the CONCURRENT_TRANSCRIPTIONS, which is probably best since those have a fixed timeout they need to respond by.
---
 subgen.py | 62 +++++++++++++++++++++----------------------------------
 1 file changed, 23 insertions(+), 39 deletions(-)

diff --git a/subgen.py b/subgen.py
index f107b37..53e8e22 100644
--- a/subgen.py
+++ b/subgen.py
@@ -109,7 +109,6 @@ update_env_variables()
 
 app = FastAPI()
 model = None
-files_to_transcribe = []
 
 in_docker = os.path.exists('/.dockerenv')
 docker_status = "Docker" if in_docker else "Standalone"
@@ -122,8 +121,11 @@ task_queue = queue.Queue()
 def transcription_worker():
     while True:
         task = task_queue.get()
-        gen_subtitles(task['path'], task['transcribe_or_translate'], task['force'],task['force_language'])
-        task_queue.task_done()
+        if 'Bazarr-' in task['path']:
+            logging.info(f"Skipping processing for {task['path']} as it is handled by ASR.")
+        else:
+            gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language'])
+            task_queue.task_done()
         # show queue
         logging.debug(f"There are {task_queue.qsize()} tasks left in the queue.")
 
@@ -309,7 +311,6 @@ def receive_tautulli_webhook(
             fullpath = file
             logging.debug("Path of file: " + fullpath)
 
-            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
             gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
     else:
         return {
@@ -337,7 +338,6 @@ def receive_plex_webhook(
             fullpath = get_plex_file_name(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.debug("Path of file: " + fullpath)
 
-            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
             gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             refresh_plex_metadata(plex_json['Metadata']['ratingKey'], plexserver, plextoken)
             logging.info(f"Metadata for item {plex_json['Metadata']['ratingKey']} refreshed successfully.")
@@ -363,7 +363,6 @@ def receive_jellyfin_webhook(
             fullpath = get_jellyfin_file_name(ItemId, jellyfinserver, jellyfintoken)
             logging.debug(f"Path of file: {fullpath}")
 
-            # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
             gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
             try:
                 refresh_jellyfin_metadata(ItemId, jellyfinserver, jellyfintoken)
@@ -397,7 +396,6 @@ def receive_emby_webhook(
 
     if event == "library.new" and procaddedmedia or event == "playback.start" and procmediaonplay:
         logging.debug("Path of file: " + fullpath)
-        # gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True)
         gen_subtitles_queue(path_mapping(fullpath), transcribe_or_translate, True)
 
     return ""
@@ -424,14 +422,17 @@ def asr(
     try:
         logging.info(f"Transcribing file from Bazarr/ASR webhook")
         result = None
-        random_name = random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6)
+        random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6))
 
         if force_detected_language_to:
             language = force_detected_language_to
 
         start_time = time.time()
         start_model()
-        files_to_transcribe.insert(0, f"Bazarr-asr-{random_name}")
+        
+        task_id = { 'path': f"Bazarr-asr-{random_name}" }        
+        task_queue.put(task_id)
+        
         audio_data = np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0
         if model_prompt:
             custom_prompt = greetings_translations.get(language, '') or custom_model_prompt
@@ -446,8 +447,7 @@ def asr(
     except Exception as e:
         logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}")
     finally:
-        if f"Bazarr-asr-{random_name}" in files_to_transcribe:
-            files_to_transcribe.remove(f"Bazarr-asr-{random_name}")
+        task_queue.task_done()
         delete_model()
     if result:
         return StreamingResponse(
@@ -470,8 +470,11 @@ def detect_language(
         logging.info(f"Detect language is set to detect on the first {detect_language_length} seconds of the audio.")
     try:
         start_model()
-        random_name = random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6)
-        files_to_transcribe.insert(0, f"Bazarr-detect-language-{random_name}")
+        random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6))
+        
+        task_id = { 'path': f"Bazarr-detect-language-{random_name}" }        
+        task_queue.put(task_id)
+        
         audio_data = np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0
         detected_lang_code = model.transcribe_stable(whisper.pad_or_trim(audio_data, detect_language_length * 16000), input_sr=16000).language
             
@@ -479,8 +482,7 @@ def detect_language(
         logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}")
         
     finally:
-        if f"Bazarr-detect-language-{random_name}" in files_to_transcribe:
-            files_to_transcribe.remove(f"Bazarr-detect-language-{random_name}")
+        task_queue.task_done()
         delete_model()
 
         return {"detected_language": whisper_languages.get(detected_lang_code, detected_lang_code) , "language_code": detected_lang_code}
@@ -492,7 +494,7 @@ def start_model():
         model = stable_whisper.load_faster_whisper(whisper_model, download_root=model_location, device=transcribe_device, cpu_threads=whisper_threads, num_workers=concurrent_transcriptions, compute_type=compute_type)
 
 def delete_model():
-    if clear_vram_on_complete and len(files_to_transcribe) == 0:
+    if clear_vram_on_complete and task_queue.qsize() == 0:
         global model
         logging.debug("Queue is empty, clearing/releasing VRAM")
         model = None
@@ -509,25 +511,17 @@ def write_lrc(result, file_path):
             fraction = int((segment.start - int(segment.start)) * 100)
             file.write(f"[{minutes:02d}:{seconds:02d}.{fraction:02d}] {segment.text}\n")
 
-def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
+def gen_subtitles(file_path: str, transcription_type: str, force_language=None) -> None:
     """Generates subtitles for a video file.
 
     Args:
         file_path: str - The path to the video file.
         transcription_type: str - The type of transcription or translation to perform.
-        add_to_front: bool - Whether to add the file to the front of the transcription queue. Default is True.
         force_language: str - The language to force for transcription or translation. Default is None.
     """
 
     try:
-        
-
-        if add_to_front:
-            files_to_transcribe.insert(0, file_path)
-        else:
-            files_to_transcribe.append(file_path)
         logging.info(f"Added {os.path.basename(file_path)} for transcription.")
-        #logging.info(f"{len(files_to_transcribe)} files in the queue for transcription")
         logging.info(f"Transcribing file: {os.path.basename(file_path)}")
 
         start_time = time.time()
@@ -562,21 +556,15 @@ def gen_subtitles(file_path: str, transcription_type: str, add_to_front=True, fo
         logging.info(f"Error processing or transcribing {file_path}: {e}")
 
     finally:
-        if file_path in files_to_transcribe:
-            files_to_transcribe.remove(file_path)
         delete_model()
 
-def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=True, force_language=None) -> None:
+def gen_subtitles_queue(file_path: str, transcription_type: str, force_language=None) -> None:
     global task_queue
     
     if not has_audio(file_path):
         logging.debug(f"{file_path} doesn't have any audio to transcribe!")
         return
 
-    if file_path in files_to_transcribe:
-        logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.")
-        return
-
     message = None
     if has_subtitle_language(file_path, skipifinternalsublang):
         message = f"{file_path} already has an internal subtitle we want, skipping generation"
@@ -591,7 +579,6 @@ def gen_subtitles_queue(file_path: str, transcription_type: str, add_to_front=Tr
     task = {
         'path': file_path,
         'transcribe_or_translate': transcription_type,
-        'force': add_to_front,
         'force_language':force_language
     }
     task_queue.put(task)
@@ -766,8 +753,7 @@ if monitor:
                 if has_audio(file_path):
                 # Call the gen_subtitles function
                     logging.info(f"File: {path_mapping(file_path)} was added")
-                    # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False)
-                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False)
+                    gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate)
         def on_created(self, event):
             self.create_subtitle(event)
         def on_modified(self, event):
@@ -782,13 +768,11 @@ def transcribe_existing(transcribe_folders, forceLanguage=None):
         for root, dirs, files in os.walk(path):
             for file in files:
                 file_path = os.path.join(root, file)
-                # gen_subtitles(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
-                gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, False, forceLanguage)
+                gen_subtitles_queue(path_mapping(file_path), transcribe_or_translate, forceLanguage)
     # if the path specified was actually a single file and not a folder, process it
     if os.path.isfile(path):
         if has_audio(path):
-            # gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
-            gen_subtitles_queue(path_mapping(path), transcribe_or_translate, False, forceLanguage) 
+            gen_subtitles_queue(path_mapping(path), transcribe_or_translate, forceLanguage) 
      # Set up the observer to watch for new files
     if monitor:
         observer = Observer()