Merge branch 'main' into Custom-Params

This commit is contained in:
McCloudS
2024-03-23 09:15:17 -06:00
committed by GitHub
3 changed files with 79 additions and 31 deletions

View File

@@ -2,6 +2,8 @@
<details>
<summary>Updates:</summary>
22 Mar 2024: Added LRC capability via see: `'LRC_FOR_AUDIO_FILES' | True | Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff' |`
21 Mar 2024: Added a 'wizard' into the launcher that will help standalone users get common Bazarr variables configured. See below in Launcher section. Removed 'Transformers' as an option. While I usually don't like to remove features, I don't think anyone is using this and the results are wildly unpredictable and often cause out of memory errors. Added two new environment variables called `USE_MODEL_PROMPT` and `CUSTOM_MODEL_PROMPT`. If `USE_MODEL_PROMPT` is `True` it will use `CUSTOM_MODEL_PROMPT` if set, otherwise will default to using the pre-configured language pairings, such as: `"en": "Hello, welcome to my lecture.",
"zh": "你好,欢迎来到我的讲座。"` These pre-configurated translations are geared towards fixing some audio that may not have punctionation. We can prompt it to try to force the use of punctuation during transcription.
@@ -175,6 +177,7 @@ The following environment variables are available in Docker. They will default
| MONITOR | False | Will monitor `TRANSCRIBE_FOLDERS` for real-time changes to see if we need to generate subtitles |
| USE_MODEL_PROMPT | False | When set to `True`, will use the default prompt stored in greetings_translations "Hello, welcome to my lecture." to try and force the use of punctuation in transcriptions that don't. |
| CUSTOM_MODEL_PROMPT | '' | If `USE_MODEL_PROMPT` is `True`, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples). |
| LRC_FOR_AUDIO_FILES' | True | Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff' |
### Images:
`mccloud/subgen:latest` is GPU or CPU <br>

View File

@@ -1,5 +1,6 @@
import os
import requests
import sys
import urllib.request
import subprocess
import argparse
@@ -9,19 +10,29 @@ def convert_to_bool(in_bool):
def install_packages_from_requirements(requirements_file):
try:
# Try installing with pip3
subprocess.run(['pip3', 'install', '-r', requirements_file, '--upgrade'], check=True)
print(f"Requirements from {requirements_file} have been successfully installed.")
except subprocess.CalledProcessError as e:
print(f"Failed to install requirements: {e}")
print("Packages installed successfully using pip3.")
except subprocess.CalledProcessError:
try:
# If pip3 fails, try installing with pip
subprocess.run(['pip', 'install', '-r', requirements_file, '--upgrade'], check=True)
print("Packages installed successfully using pip.")
except subprocess.CalledProcessError:
print("Failed to install packages using both pip3 and pip.")
def download_from_github(url, output_file):
response = requests.get(url)
if response.status_code == 200:
with open(output_file, 'wb') as f:
f.write(response.content)
try:
with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
data = response.read() # a `bytes` object
out_file.write(data)
print(f"File downloaded successfully to {output_file}")
else:
print(f"Failed to download file from {url}")
except urllib.error.HTTPError as e:
print(f"Failed to download file from {url}. HTTP Error Code: {e.code}")
except urllib.error.URLError as e:
print(f"URL Error: {e.reason}")
except Exception as e:
print(f"An error occurred: {e}")
def prompt_and_save_bazarr_env_variables():
"""
@@ -72,27 +83,59 @@ def load_env_variables(env_filename='subgen.env'):
print(f"{env_filename} file not found. Please run prompt_and_save_env_variables() first.")
def main():
# Check if the script is run with 'python' or 'python3'
if 'python3' in sys.executable:
python_cmd = 'python3'
elif 'python' in sys.executable:
python_cmd = 'python'
else:
print("Script started with an unknown command")
sys.exit(1)
if sys.version_info[0] < 3:
print(f"This script requires Python 3 or higher, you are running {sys.version}")
sys.exit(1) # Terminate the script
#Make sure we're saving subgen.py and subgen.env in the right folder
os.chdir(os.path.dirname(os.path.abspath(__file__)))
# Construct the argument parser
parser = argparse.ArgumentParser()
parser.add_argument( '-d', '--debug', default=False, action='store_true', help="Enable console debugging (default: False)")
parser.add_argument('-d', '--debug', default=False, action='store_true', help="Enable console debugging (default: False)")
parser.add_argument('-i', '--install', default=False, action='store_true', help="Install/update all necessary packages (default: False)")
parser.add_argument('-a', '--append', default=False, action='store_true', help="Append 'Transcribed by whisper' to generated subtitle (default: False)")
parser.add_argument('-u', '--update', default=False, action='store_true', help="Update Subgen (default: False)")
parser.add_argument('-dnr', '--donotrun', default=False, action='store_true', help="Do not run subgen.py (default: False)")
parser.add_argument('-b', '--bazarrsetup', default=False, action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs (default: False)")
parser.add_argument('-x', '--exit-early', default=False, action='store_true', help="Exit without running subgen.py (default: False)")
parser.add_argument('-s', '--setup-bazarr', default=False, action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs (default: False)")
parser.add_argument('-b', '--branch', type=str, default='main', help='Specify the branch to download from. (default: main)')
parser.add_argument('-l', '--launcher-update', default=False, action='store_true', help="Update launcher.py and re-launch (default: False)")
args = parser.parse_args()
# Get the branch name from the BRANCH environment variable or default to 'main'
branch_name = args.branch if args.branch != 'main' else os.getenv('BRANCH', 'main')
# Determine the script name based on the branch name
script_name = f"-{branch_name}.py" if branch_name != "main" else ".py"
# Check we need to update the launcher
if args.launcher_update or convert_to_bool(os.getenv('LAUNCHER_UPDATE')):
print(f"Updating launcher.py from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/launcher.py", f'launcher{script_name}')
# Prepare the arguments to exclude update triggers
excluded_args = ['--launcher-update', '-l']
new_args = [arg for arg in sys.argv[1:] if arg not in excluded_args]
if branch_name == 'main' and args.launcher_update:
print("Running launcher.py for the 'main' branch.")
os.execl(sys.executable, sys.executable, "launcher.py", *new_args)
elif args.launcher_update:
print(f"Running launcher-{branch_name}.py for the '{branch_name}' branch.")
os.execl(sys.executable, sys.executable, f"launcher{script_name}", *new_args)
# Set environment variables based on the parsed arguments
os.environ['DEBUG'] = str(args.debug)
os.environ['APPEND'] = str(args.append)
if args.bazarrsetup:
if args.setup_bazarr:
prompt_and_save_bazarr_env_variables()
load_env_variables()
@@ -102,22 +145,24 @@ def main():
# Install packages from requirements.txt if the install or packageupdate argument is True
if args.install:
download_from_github(requirements_url, requirements_file)
install_packages_from_requirements(requirements_file)
subgen_script_name = "./subgen.py"
if not os.path.exists(subgen_script_name):
print(f"File {subgen_script_name} does not exist. Downloading from GitHub...")
download_from_github("https://raw.githubusercontent.com/McCloudS/subgen/main/subgen.py", subgen_script_name)
elif convert_to_bool(os.getenv("UPDATE", "False")) or args.update:
print(f"File exists, but UPDATE is set to True. Downloading {subgen_script_name} from GitHub...")
download_from_github("https://raw.githubusercontent.com/McCloudS/subgen/main/subgen.py", subgen_script_name)
# Check if the script exists or if the UPDATE environment variable is set to True
if not os.path.exists(f'subgen{script_name}') or args.update or convert_to_bool(os.getenv('UPDATE')):
print(f"Downloading subgen.py from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/subgen.py", f'subgen{script_name}')
else:
print("Environment variable UPDATE is not set or set to False, skipping download.")
if not args.donotrun:
subprocess.run(['python3', '-u', 'subgen.py'], check=True)
print("subgen.py exists and UPDATE is set to False, skipping download.")
if not args.exit_early:
print(f'Launching subgen{script_name}')
if branch_name != 'main':
subprocess.run([f'{python_cmd}', '-u', f'subgen{script_name}'], check=True)
else:
subprocess.run([f'{python_cmd}', '-u', 'subgen.py'], check=True)
else:
print("Not running subgen.py: -dnr or --donotrun set")
print("Not running subgen.py: -x or --exitearly set")
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,4 @@
subgen_version = '2024.3.21.45'
subgen_version = '2024.3.23.57'
from datetime import datetime
import subprocess
@@ -59,8 +59,8 @@ append = convert_to_bool(os.getenv('APPEND', False))
reload_script_on_change = convert_to_bool(os.getenv('RELOAD_SCRIPT_ON_CHANGE', False))
model_prompt = os.getenv('USE_MODEL_PROMPT', 'False')
custom_model_prompt = os.getenv('CUSTOM_MODEL_PROMPT', '')
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
lrc_for_audio_files = convert_to_bool(os.getenv('LRC_FOR_AUDIO_FILES', True))
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
if transcribe_device == "gpu":
transcribe_device = "cuda"