From d04dac4a9a39884680a9aa7a55f7c719573e5c54 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Tue, 5 Mar 2024 08:37:35 -0700 Subject: [PATCH] Update subgen.py --- subgen/subgen.py | 79 ++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/subgen/subgen.py b/subgen/subgen.py index b7056ec..7384104 100644 --- a/subgen/subgen.py +++ b/subgen/subgen.py @@ -1,4 +1,4 @@ -subgen_version = '2024.3.5.189' +subgen_version = '2024.3.5.188' from datetime import datetime import subprocess @@ -69,24 +69,10 @@ files_to_transcribe = [] subextension = f".subgen.{whisper_model.split('.')[0]}.{namesublang}.srt" subextensionSDH = f".subgen.{whisper_model.split('.')[0]}.{namesublang}.sdh.srt" -if timestamps: - # Define a custom class that modifies sys.stdout - class TimestampedStdout: - def __init__(self, stdout): - self.stdout = stdout - def write(self, x): - # Append the timestamp to every printed line - self.stdout.write(x.replace("\n", "\n[%s] " % str(datetime.now()))) - def flush(self): - # Flush the output - self.stdout.flush() - # Replace sys.stdout with the custom class - sys.stdout = TimestampedStdout(sys.stdout) - if debug: - logging.basicConfig(stream=sys.stderr, level=logging.DEBUG, format="%(asctime)s %(levelname)s: %(message)s") + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(asctime)s %(levelname)s: %(message)s") else: - logging.basicConfig(stream=sys.stderr, level=logging.INFO) + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logging.getLogger("multipart").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) @@ -124,8 +110,7 @@ def status(): @app.post("/webhook") async def print_warning(): - print("*** This is the legacy webhook. You need to update to webhook urls to end in plex, tautulli, emby, or jellyfin instead of webhook. ***") - return "" + return {"*** This is the legacy webhook. You need to update to webhook urls to end in plex, tautulli, emby, or jellyfin instead of webhook. ***"} @app.post("/tautulli") def receive_tautulli_webhook( @@ -142,7 +127,7 @@ def receive_tautulli_webhook( gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) else: - print("This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!") + return {"This doesn't appear to be a properly configured Tautulli webhook, please review the instructions again!"} return "" @@ -168,7 +153,7 @@ def receive_plex_webhook( except Exception as e: logging.error(f"Failed to refresh metadata for item {plex_json['Metadata']['ratingKey']}: {e}") else: - print("This doesn't appear to be a properly configured Plex webhook, please review the instructions again!") + return {"This doesn't appear to be a properly configured Plex webhook, please review the instructions again!"} return "" @@ -194,7 +179,7 @@ def receive_jellyfin_webhook( except Exception as e: logging.error(f"Failed to refresh metadata for item {ItemId}: {e}") else: - print("This doesn't appear to be a properly configured Jellyfin webhook, please review the instructions again!") + return {"This doesn't appear to be a properly configured Jellyfin webhook, please review the instructions again!"} return "" @@ -216,7 +201,7 @@ def receive_emby_webhook( gen_subtitles(path_mapping(fullpath), transcribe_or_translate, True) else: - print("This doesn't appear to be a properly configured Emby webhook, please review the instructions again!") + return {"This doesn't appear to be a properly configured Emby webhook, please review the instructions again!"} return "" @@ -239,7 +224,7 @@ def asr( word_timestamps: bool = Query(default=False, description="Word level timestamps") #not used by Bazarr ): try: - print(f"Transcribing file from Bazarr/ASR webhook") + logging.info(f"Transcribing file from Bazarr/ASR webhook") result = None #give the 'process' a random name so mutliple Bazaar transcribes can operate at the same time. random_name = random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6) @@ -255,9 +240,9 @@ def asr( appendLine(result) elapsed_time = time.time() - start_time minutes, seconds = divmod(int(elapsed_time), 60) - print(f"Bazarr transcription is completed, it took {minutes} minutes and {seconds} seconds to complete.") + logging.info(f"Bazarr transcription is completed, it took {minutes} minutes and {seconds} seconds to complete.") except Exception as e: - print(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") + logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") finally: if f"Bazarr-detect-langauge-{random_name}" in files_to_transcribe: files_to_transcribe.remove(f"Bazarr-detect-langauge-{random_name}") @@ -290,7 +275,7 @@ def detect_language( detected_lang_code = model.transcribe_stable(whisper.pad_or_trim(np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0), input_sr=16000).language except Exception as e: - print(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") + logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}") finally: if f"Bazarr-detect-langauge-{random_name}" in files_to_transcribe: @@ -358,24 +343,24 @@ def gen_subtitles(file_path: str, transcribe_or_translate: str, front=True, forc elif os.path.exists(file_path.rsplit('.', 1)[0] + subextensionSDH): message = f"{file_path} already has a SDH subtitle created for this, skipping it" if message != None: - print(message) + logging.info(message) return message if front: files_to_transcribe.insert(0, file_path) else: files_to_transcribe.append(file_path) - print(f"Added {os.path.basename(file_path)} for transcription.") + logging.info(f"Added {os.path.basename(file_path)} for transcription.") # Start transcription for the file in a separate thread - print(f"{len(files_to_transcribe)} files in the queue for transcription") - print(f"Transcribing file: {os.path.basename(file_path)}") + logging.info(f"{len(files_to_transcribe)} files in the queue for transcription") + logging.info(f"Transcribing file: {os.path.basename(file_path)}") start_time = time.time() start_model() global force_detected_language_to if force_detected_language_to: forceLanguage = force_detected_language_to - print(f"Forcing language to {forceLanguage}") + logging.info(f"Forcing language to {forceLanguage}") if(hf_transformers): result = model.transcribe(file_path, language=forceLanguage, batch_size=hf_batch_size, task=transcribe_or_translate) else: @@ -384,12 +369,12 @@ def gen_subtitles(file_path: str, transcribe_or_translate: str, front=True, forc result.to_srt_vtt(get_file_name_without_extension(file_path) + subextension, word_level=word_level_highlight) elapsed_time = time.time() - start_time minutes, seconds = divmod(int(elapsed_time), 60) - print(f"Transcription of {os.path.basename(file_path)} is completed, it took {minutes} minutes and {seconds} seconds to complete.") + logging.info(f"Transcription of {os.path.basename(file_path)} is completed, it took {minutes} minutes and {seconds} seconds to complete.") else: - print(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") + logging.info(f"File {os.path.basename(file_path)} is already in the transcription list. Skipping.") except Exception as e: - print(f"Error processing or transcribing {file_path}: {e}") + logging.info(f"Error processing or transcribing {file_path}: {e}") finally: if file_path in files_to_transcribe: files_to_transcribe.remove(file_path) @@ -420,7 +405,7 @@ def has_subtitle_language(video_file, target_language): container.close() except Exception as e: - print(f"An error occurred: {e}") + logging.info(f"An error occurred: {e}") return False def get_plex_file_name(itemid: str, server_ip: str, plex_token: str) -> str: @@ -476,7 +461,7 @@ def refresh_plex_metadata(itemid: str, server_ip: str, plex_token: str) -> None: # Check if the request was successful if response.status_code == 200: - print("Metadata refresh initiated successfully.") + logging.info("Metadata refresh initiated successfully.") else: raise Exception(f"Error refreshing metadata: {response.status_code}") @@ -512,7 +497,7 @@ def refresh_jellyfin_metadata(itemid: str, server_ip: str, jellyfin_token: str) # Check if the request was successful if response.status_code == 204: - print("Metadata refresh queued successfully.") + logging.info("Metadata refresh queued successfully.") else: raise Exception(f"Error refreshing metadata: {response.status_code}") @@ -570,7 +555,7 @@ def path_mapping(fullpath): def transcribe_existing(transcribe_folders, forceLanguage=None): transcribe_folders = transcribe_folders.split("|") - print("Starting to search folders to see if we need to create subtitles.") + logging.info("Starting to search folders to see if we need to create subtitles.") logging.debug("The folders are:") for path in transcribe_folders: logging.debug(path) @@ -583,7 +568,7 @@ def transcribe_existing(transcribe_folders, forceLanguage=None): if has_audio(path): gen_subtitles(path_mapping(path), transcribe_or_translate, False, forceLanguage) - print("Finished searching and queueing files for transcription") + logging.info("Finished searching and queueing files for transcription") whisper_languages = { "en": "english", @@ -689,15 +674,15 @@ whisper_languages = { if __name__ == "__main__": import uvicorn - print(f"Subgen v{subgen_version}") - print("Starting Subgen with listening webhooks!") - print(f"Transcriptions are limited to running {str(concurrent_transcriptions)} at a time") - print(f"Running {str(whisper_threads)} threads per transcription") - print(f"Using {transcribe_device} to encode") + logging.info(f"Subgen v{subgen_version}") + logging.info("Starting Subgen with listening webhooks!") + logging.info(f"Transcriptions are limited to running {str(concurrent_transcriptions)} at a time") + logging.info(f"Running {str(whisper_threads)} threads per transcription") + logging.info(f"Using {transcribe_device} to encode") if hf_transformers: - print(f"Using Hugging Face Transformers") + logging.info(f"Using Hugging Face Transformers") else: - print(f"Using faster-whisper") + logging.info(f"Using faster-whisper") if transcribe_folders: transcribe_existing(transcribe_folders) uvicorn.run("subgen:app", host="0.0.0.0", port=int(webhookport), reload=debug, use_colors=True)