diff --git a/README.md b/README.md index 9f3a394..010f240 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@
Updates: -21 Mar 2024: Added a 'wizard' into the launcher that will help standalone users get common Bazarr variables configured. See below in Launcher section. Removed 'Transformers' as an option. While I usually don't like to remove features, I don't think anyone is using this and the results are wildly unpredictable and often cause out of memory errors. +21 Mar 2024: Added a 'wizard' into the launcher that will help standalone users get common Bazarr variables configured. See below in Launcher section. Removed 'Transformers' as an option. While I usually don't like to remove features, I don't think anyone is using this and the results are wildly unpredictable and often cause out of memory errors. Added two new environment variables called `USE_MODEL_PROMPT` and `CUSTOM_MODEL_PROMPT`. If `USE_MODEL_PROMPT` is `True` it will use `CUSTOM_MODEL_PROMPT` if set, otherwise will default to using the pre-configured language pairings, such as: `"en": "Hello, welcome to my lecture.", + "zh": "你好,欢迎来到我的讲座。"` These pre-configurated translations are geared towards fixing some audio that may not have punctionation. We can prompt it to try to force the use of punctuation during transcription. 19 Mar 2024: Added a `MONITOR` environment variable. Will 'watch' or 'monitor' your `TRANSCRIBE_FOLDERS` for changes and run on them. Useful if you just want to paste files into a folder and get subtitles. @@ -172,6 +173,8 @@ The following environment variables are available in Docker. They will default | UPDATE | False | Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates. | | APPEND | False | Will add the following at the end of a subtitle: "Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}" | MONITOR | False | Will monitor `TRANSCRIBE_FOLDERS` for real-time changes to see if we need to generate subtitles | +| USE_MODEL_PROMPT | False | When set to `True`, will use the default prompt stored in greetings_translations "Hello, welcome to my lecture." to try and force the use of punctuation in transcriptions that don't. | +| CUSTOM_MODEL_PROMPT | '' | If `USE_MODEL_PROMPT` is `True`, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples). | ### Images: `mccloud/subgen:latest` is GPU or CPU
diff --git a/subgen.py b/subgen.py index 8072aee..8959030 100644 --- a/subgen.py +++ b/subgen.py @@ -1,4 +1,4 @@ -subgen_version = '2024.3.21.43' +subgen_version = '2024.3.21.44' from datetime import datetime import subprocess @@ -57,6 +57,8 @@ clear_vram_on_complete = convert_to_bool(os.getenv('CLEAR_VRAM_ON_COMPLETE', Tru compute_type = os.getenv('COMPUTE_TYPE', 'auto') append = convert_to_bool(os.getenv('APPEND', False)) reload_script_on_change = convert_to_bool(os.getenv('RELOAD_SCRIPT_ON_CHANGE', False)) +model_prompt = os.getenv('USE_MODEL_PROMPT', 'False') +custom_model_prompt = os.getenv('CUSTOM_MODEL_PROMPT', '') if transcribe_device == "gpu": transcribe_device = "cuda" @@ -332,7 +334,9 @@ def asr( start_model() files_to_transcribe.insert(0, f"Bazarr-asr-{random_name}") audio_data = np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0 - result = model.transcribe_stable(audio_data, task=task, input_sr=16000, language=language, progress_callback=progress) + if(model_prompt): + custom_model_prompt = greetings_translations.get(language, '') or custom_model_prompt + result = model.transcribe_stable(audio_data, task=task, input_sr=16000, language=language, progress_callback=progress, initial_prompt=custom_model_prompt) appendLine(result) elapsed_time = time.time() - start_time minutes, seconds = divmod(int(elapsed_time), 60) @@ -432,7 +436,7 @@ def gen_subtitles(file_path: str, transcribe_or_translate: str, front=True, forc if force_detected_language_to: forceLanguage = force_detected_language_to logging.info(f"Forcing language to {forceLanguage}") - result = model.transcribe_stable(file_path, language=forceLanguage, task=transcribe_or_translate, progress_callback=progress) + result = model.transcribe_stable(file_path, language=forceLanguage, task=transcribe_or_translate, progress_callback=progress, initial_prompt=custom_model_prompt) appendLine(result) result.to_srt_vtt(get_file_name_without_extension(file_path) + subextension, word_level=word_level_highlight) elapsed_time = time.time() - start_time @@ -750,6 +754,108 @@ whisper_languages = { "su": "sundanese", } +greetings_translations = { + "en": "Hello, welcome to my lecture.", + "zh": "你好,欢迎来到我的讲座。", + "de": "Hallo, willkommen zu meiner Vorlesung.", + "es": "Hola, bienvenido a mi conferencia.", + "ru": "Привет, добро пожаловать на мою лекцию.", + "ko": "안녕하세요, 제 강의에 오신 것을 환영합니다.", + "fr": "Bonjour, bienvenue à mon cours.", + "ja": "こんにちは、私の講義へようこそ。", + "pt": "Olá, bem-vindo à minha palestra.", + "tr": "Merhaba, dersime hoş geldiniz.", + "pl": "Cześć, witaj na mojej wykładzie.", + "ca": "Hola, benvingut a la meva conferència.", + "nl": "Hallo, welkom bij mijn lezing.", + "ar": "مرحبًا، مرحبًا بك في محاضرتي.", + "sv": "Hej, välkommen till min föreläsning.", + "it": "Ciao, benvenuto alla mia conferenza.", + "id": "Halo, selamat datang di kuliah saya.", + "hi": "नमस्ते, मेरे व्याख्यान में आपका स्वागत है।", + "fi": "Hei, tervetuloa luentooni.", + "vi": "Xin chào, chào mừng bạn đến với bài giảng của tôi.", + "he": "שלום, ברוך הבא להרצאתי.", + "uk": "Привіт, ласкаво просимо на мою лекцію.", + "el": "Γεια σας, καλώς ήλθατε στη διάλεξή μου.", + "ms": "Halo, selamat datang ke kuliah saya.", + "cs": "Ahoj, vítejte na mé přednášce.", + "ro": "Bună, bun venit la cursul meu.", + "da": "Hej, velkommen til min forelæsning.", + "hu": "Helló, üdvözöllek az előadásomon.", + "ta": "வணக்கம், என் பாடத்திற்கு வரவேற்கிறேன்.", + "no": "Hei, velkommen til foredraget mitt.", + "th": "สวัสดีครับ ยินดีต้อนรับสู่การบรรยายของฉัน", + "ur": "ہیلو، میری لیکچر میں خوش آمدید۔", + "hr": "Pozdrav, dobrodošli na moje predavanje.", + "bg": "Здравейте, добре дошли на моята лекция.", + "lt": "Sveiki, sveiki atvykę į mano paskaitą.", + "la": "Salve, gratias vobis pro eo quod meam lectionem excipitis.", + "mi": "Kia ora, nau mai ki aku rorohiko.", + "ml": "ഹലോ, എന്റെ പാഠത്തിലേക്ക് സ്വാഗതം.", + "cy": "Helo, croeso i fy narlith.", + "sk": "Ahoj, vitajte na mojej prednáške.", + "te": "హలో, నా పాఠానికి స్వాగతం.", + "fa": "سلام، خوش آمدید به سخنرانی من.", + "lv": "Sveiki, laipni lūdzam uz manu lekciju.", + "bn": "হ্যালো, আমার লেকচারে আপনাকে স্বাগতম।", + "sr": "Здраво, добродошли на моје предавање.", + "az": "Salam, mənim dərsimə xoş gəlmisiniz.", + "sl": "Pozdravljeni, dobrodošli na moje predavanje.", + "kn": "ಹಲೋ, ನನ್ನ ಭಾಷಣಕ್ಕೆ ಸುಸ್ವಾಗತ.", + "et": "Tere, tere tulemast minu loengusse.", + "mk": "Здраво, добредојдовте на мојата предавање.", + "br": "Demat, kroget e oa d'an daol-labour.", + "eu": "Kaixo, ongi etorri nire hitzaldi.", + "is": "Halló, velkomin á fyrirlestur minn.", + "hy": "Բարեւ, ողջույն եկավ իմ դասընթացի.", + "ne": "नमस्ते, मेरो प्रवचनमा स्वागत छ।", + "mn": "Сайн байна уу, миний хичээлд тавтай морилно уу.", + "bs": "Zdravo, dobrodošli na moje predavanje.", + "kk": "Сәлеметсіз бе, оқу сабағыма қош келдіңіз.", + "sq": "Përshëndetje, mirësevini në ligjëratën time.", + "sw": "Habari, karibu kwenye hotuba yangu.", + "gl": "Ola, benvido á miña conferencia.", + "mr": "नमस्कार, माझ्या व्याख्यानात आपले स्वागत आहे.", + "pa": "ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਮੇਰੀ ਵਾਰਤਾ ਵਿੱਚ ਤੁਹਾਨੂੰ ਜੀ ਆਇਆ ਨੂੰ ਸੁਆਗਤ ਹੈ।", + "si": "හෙලෝ, මගේ වාර්තාවට ඔබේ ස්වාදයට සාමාජිකත්වයක්.", + "km": "សួស្តី, សូមស្វាគមន៍មកកាន់អារម្មណ៍របស់ខ្ញុំ។", + "sn": "Mhoro, wakaribisha kumusoro wangu.", + "yo": "Bawo, ku isoro si wa orin mi.", + "so": "Soo dhawoow, soo dhawoow marka laga hadlo kulambanayaashaaga.", + "af": "Hallo, welkom by my lesing.", + "oc": "Bonjorn, benvenguda a ma conferéncia.", + "ka": "გამარჯობა, მესწარმეტყველება ჩემი ლექციაზე.", + "be": "Прывітанне, запрашаем на маю лекцыю.", + "tg": "Салом, ба лаҳзаи мавзӯъати ман хуш омадед.", + "sd": "هيلو، ميري ليڪڪي ۾ خوش آيو.", + "gu": "નમસ્તે, મારી પાઠશાળામાં આપનું સ્વાગત છે.", + "am": "ሰላም፣ ለአንድነት የተመረጠን ትምህርት በመሆን እናመሰግናለን።", + "yi": "העלאָ, ווילקומן צו מיין לעקטשער.", + "lo": "ສະບາຍດີ, ຍິນດີນາງຂອງຂ້ອຍໄດ້ຍິນດີ.", + "uz": "Salom, darsimda xush kelibsiz.", + "fo": "Halló, vælkomin til mína fyrilestrar.", + "ht": "Bonjou, byenveni nan leson mwen.", + "ps": "سلام، مې لومړۍ کې خوش آمدید.", + "tk": "Salam, dersimiňe hoş geldiňiz.", + "nn": "Hei, velkomen til førelesinga mi.", + "mt": "Hello, merħba għall-lezzjoni tiegħi.", + "sa": "नमस्ते, मम उपन्यासे स्वागतं.", + "lb": "Hallo, wëllkomm zu menger Lektioun.", + "my": "မင်္ဂလာပါ၊ ကျေးဇူးတင်သည့်ကိစ္စသည်။", + "bo": "བཀྲ་ཤིས་བདེ་ལེགས་འབད་བཅོས། ངའི་འཛིན་གྱི་སློབ་མའི་མིང་གི་འཕྲོད།", + "tl": "Kamusta, maligayang pagdating sa aking leksyon.", + "mg": "Manao ahoana, tonga soa sy tonga soa eto amin'ny lesona.", + "as": "নমস্কাৰ, মোৰ পাঠলৈ আপোনাক স্বাগতম।", + "tt": "Сәлам, лекциямга рәхмәт киләсез.", + "haw": "Aloha, welina me ke kipa ana i ko'u ha'i 'ōlelo.", + "ln": "Mbote, tango na zongisa mwa kilela yandi.", + "ha": "Sannu, ka ci gaba da tattalin arziki na.", + "ba": "Сәләм, лекцияғыма ҡуш тиңләгәнһүҙ.", + "jw": "Halo, sugeng datang marang kulawargané.", + "su": "Wilujeng, hatur nuhun ka lékturing abdi.", +} + if __name__ == "__main__": import uvicorn logging.info(f"Subgen v{subgen_version}")