From ed02fc629583c12aa85fb13171392943a056b041 Mon Sep 17 00:00:00 2001 From: McCloudS <64094529+McCloudS@users.noreply.github.com> Date: Wed, 6 Mar 2024 09:12:53 -0700 Subject: [PATCH] Added a subsync endpoint --- subgen/subgen.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/subgen/subgen.py b/subgen/subgen.py index be19a7a..f2d950e 100644 --- a/subgen/subgen.py +++ b/subgen/subgen.py @@ -22,6 +22,7 @@ import requests import av import ffmpeg import whisper +import re def convert_to_bool(in_bool): if isinstance(in_bool, bool): @@ -155,6 +156,47 @@ def status(): docker_status = "Docker" if in_docker else "Standalone" return {"version" : f"Subgen {subgen_version}, stable-ts {stable_whisper.__version__}, whisper {whisper.__version__} ({docker_status})"} +@app.post("/subsync") +def subsync( + audio_file: UploadFile = File(...), + subtitle_file: UploadFile = File(...), + language: Union[str, None] = Query(default=None), +): + try: + logging.info(f"Syncing subtitle file from Subsync webhook") + result = None + + srt_content = subtitle_file.file.read().decode('utf-8') + srt_content = re.sub(r'\{.*?\}', '', srt_content) + # Remove numeric counters for each entry + srt_content = re.sub(r'^\d+$', '', srt_content, flags=re.MULTILINE) + # Remove timestamps and formatting + srt_content = re.sub(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', '', srt_content) + # Remove any remaining newlines and spaces + srt_content = re.sub(r'\n\n+', '\n', srt_content).strip() + + start_time = time.time() + start_model() + + result = model.align(audio_file.file.read(), srt_content, language=language) + appendLine(result) + elapsed_time = time.time() - start_time + minutes, seconds = divmod(int(elapsed_time), 60) + logging.info(f"Subsync is completed, it took {minutes} minutes and {seconds} seconds to complete.") + except Exception as e: + logging.info(f"Error processing or aligning {audio_file.filename} or {subtitle_file.filename}: {e}") + finally: + delete_model() + if result: + return StreamingResponse( + iter(result.to_srt_vtt(filepath = None, word_level=word_level_highlight)), + media_type="text/plain", + headers={ + 'Source': 'Aligned using stable-ts from Subgen!', + }) + else: + return + @app.post("/tautulli") def receive_tautulli_webhook( source: Union[str, None] = Header(None),