From 8bedd6edf906b1c5a97304716e364ebedfbbd246 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Wed, 25 Oct 2023 22:47:42 -0600 Subject: [PATCH] Update subgen.py --- subgen/subgen.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/subgen/subgen.py b/subgen/subgen.py index f74b72d..e356340 100644 --- a/subgen/subgen.py +++ b/subgen/subgen.py @@ -7,6 +7,7 @@ import sys import time import queue import logging +from array import array # List of packages to install packages_to_install = [ @@ -58,13 +59,14 @@ use_path_mapping = convert_to_bool(os.getenv('USE_PATH_MAPPING', False)) path_mapping_from = os.getenv('PATH_MAPPING_FROM', '/tv') path_mapping_to = os.getenv('PATH_MAPPING_TO', '/Volumes/TV') model_location = os.getenv('MODEL_PATH', '.') +transcribe_folders = os.getenv('TRANSCRIBE_FOLDERS', '') if transcribe_device == "gpu": transcribe_device = "cuda" jellyfin_userid = "" app = Flask(__name__) model = stable_whisper.load_faster_whisper(whisper_model, download_root=model_location, device=transcribe_device, cpu_threads=whisper_threads, num_workers=concurrent_transcriptions) -files_to_transcribe = set() +files_to_transcribe = [] subextension = '.subgen.' + whisper_model + '.' + namesublang + '.srt' print("Transcriptions are limited to running " + str(concurrent_transcriptions) + " at a time") print("Running " + str(whisper_threads) + " threads per transcription") @@ -187,7 +189,7 @@ def gen_subtitles(video_file_path: str) -> None: files_to_transcribe.remove(video_file_path) # Function to add a file for transcription -def add_file_for_transcription(file_path): +def add_file_for_transcription(file_path, front=True): if file_path not in files_to_transcribe: if has_subtitle_language(file_path, skipifinternalsublang): @@ -197,7 +199,10 @@ def add_file_for_transcription(file_path): print("We already have a subgen created for this file, skipping it") return "We already have a subgen created for this file, skipping it" - files_to_transcribe.add(file_path) + if front: + files_to_transcribe.insert(0, file_path) + else: + files_to_transcribe.append(file_path) print(f"Added {file_path} for transcription.") # Start transcription for the file in a separate thread @@ -292,6 +297,29 @@ def get_jellyfin_file_name(item_id: str, jellyfin_url: str, jellyfin_token: str) else: raise Exception(f"Error: {response.status_code}") +def is_video_file(file_path): + try: + container = av.open(file_path) + for stream in container.streams: + if stream.type == 'video': + return True + return False + except av.AVError: + return False + +def transcribe_existing(): + for path in transcribe_folders: + for root, dirs, files in os.walk(path): + for file in files: + file_path = os.path.join(root, file) + if is_video_file(file_path): + add_file_for_transcription(file_path, False) + +if transcribe_folders: + transcribe_folders = transcribe_folders.split(",") + transcription_thread = threading.Thread(target=transcribe_existing) + transcription_thread.start() + print("Starting webhook!") if __name__ == "__main__": app.run(debug=debug, host='0.0.0.0', port=int(webhookport))