chore: cleanup legacy files and update gitignore

- Remove obsolete Docker files (will be recreated later) - Remove legacy launcher.py and transcriptarr.py - Remove subgen.xml configuration - Remove test_backend.py (tests will be restructured) - Remove language_code.py from root (moved to backend/core/) - Update .gitignore for Python project structure
2026-01-16 15:10:14 +01:00
parent 8acbe84b15
commit 9655686a50
9 changed files with 9 additions and 2498 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,12 @@
 #ignore our settings
 .env
-models/
+models/
 transcriptarr.db
 # Python cache
 __pycache__/
 **/__pycache__/
 *.pyc
 *.pyo
--- a/45
+++ b/45
@@ -1,45 +0,0 @@
 # Stage 1: Builder
 FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
 WORKDIR /subgen
 ARG DEBIAN_FRONTEND=noninteractive
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    ffmpeg \
    git \
    tzdata \
    && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY . .
 # Stage 2: Runtime
 FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
 WORKDIR /subgen
 # Copy necessary files from the builder stage
 COPY --from=builder /subgen/launcher.py .
 COPY --from=builder /subgen/subgen.py .
 COPY --from=builder /subgen/language_code.py .
 COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
 # Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    python3 \
    curl \
    && rm -rf /var/lib/apt/lists/*
 ENV PYTHONUNBUFFERED=1
 # Set command to run the application
 CMD ["python3", "launcher.py"]
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -1,34 +0,0 @@
 # === Stage 1: Build dependencies and install packages ===
 FROM python:3.11-slim-bullseye AS builder
 WORKDIR /subgen
 # Install required build dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    git \
    tzdata \
    && rm -rf /var/lib/apt/lists/*
 # Copy and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir --prefix=/install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cpu && pip install --no-cache-dir --prefix=/install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
 # === Stage 2: Create a minimal runtime image ===
 FROM python:3.11-slim-bullseye AS runtime
 WORKDIR /subgen
 # Install only required runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Copy only necessary files from builder stage
 COPY --from=builder /install /usr/local
 # Copy source code
 COPY launcher.py subgen.py language_code.py /subgen/
 CMD ["python3", "launcher.py"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,40 +0,0 @@
 #docker-compose.yml
 version: '2'
 services:
  subgen:
    container_name: subgen
    tty: true
    image: mccloud/subgen
    environment:
       - "WHISPER_MODEL=medium"
       - "WHISPER_THREADS=4"
       - "PROCADDEDMEDIA=True"
       - "PROCMEDIAONPLAY=False"
       - "NAMESUBLANG=aa"
       - "SKIPIFINTERNALSUBLANG=eng"
       - "PLEXTOKEN=plextoken"
       - "PLEXSERVER=http://plexserver:32400"
       - "JELLYFINTOKEN=token here"
       - "JELLYFINSERVER=http://jellyfin:8096"
       - "WEBHOOKPORT=9000"
       - "CONCURRENT_TRANSCRIPTIONS=2"
       - "WORD_LEVEL_HIGHLIGHT=False"
       - "DEBUG=True"
       - "USE_PATH_MAPPING=False"
       - "PATH_MAPPING_FROM=/tv"
       - "PATH_MAPPING_TO=/Volumes/TV"
       - "TRANSCRIBE_DEVICE=cpu"
       - "CLEAR_VRAM_ON_COMPLETE=True"
       - "MODEL_PATH=./models"
       - "UPDATE=False"
       - "APPEND=False"
       - "USE_MODEL_PROMPT=False"
       - "CUSTOM_MODEL_PROMPT="
       - "LRC_FOR_AUDIO_FILES=True"
       - "CUSTOM_REGROUP=cm_sl=84_sl=42++++++1"
    volumes:
       - "${TV}:/tv"
       - "${MOVIES}:/movies"
       - "${APPDATA}/subgen/models:/subgen/models"
    ports:
       - "9000:9000"
--- a/language_code.py
+++ b/language_code.py
@@ -1,198 +0,0 @@
 from enum import Enum
 class LanguageCode(Enum):
    # ISO 639-1, ISO 639-2/T, ISO 639-2/B, English Name, Native Name
    AFAR = ("aa", "aar", "aar", "Afar", "Afar") 
    AFRIKAANS = ("af", "afr", "afr", "Afrikaans", "Afrikaans")
    AMHARIC = ("am", "amh", "amh", "Amharic", "አማርኛ")
    ARABIC = ("ar", "ara", "ara", "Arabic", "العربية")
    ASSAMESE = ("as", "asm", "asm", "Assamese", "অসমীয়া")
    AZERBAIJANI = ("az", "aze", "aze", "Azerbaijani", "Azərbaycanca")
    BASHKIR = ("ba", "bak", "bak", "Bashkir", "Башҡортса")
    BELARUSIAN = ("be", "bel", "bel", "Belarusian", "Беларуская")
    BULGARIAN = ("bg", "bul", "bul", "Bulgarian", "Български")
    BENGALI = ("bn", "ben", "ben", "Bengali", "বাংলা")
    TIBETAN = ("bo", "bod", "tib", "Tibetan", "བོད་ཡིག")
    BRETON = ("br", "bre", "bre", "Breton", "Brezhoneg")
    BOSNIAN = ("bs", "bos", "bos", "Bosnian", "Bosanski")
    CATALAN = ("ca", "cat", "cat", "Catalan", "Català")
    CZECH = ("cs", "ces", "cze", "Czech", "Čeština")
    WELSH = ("cy", "cym", "wel", "Welsh", "Cymraeg")
    DANISH = ("da", "dan", "dan", "Danish", "Dansk")
    GERMAN = ("de", "deu", "ger", "German", "Deutsch")
    GREEK = ("el", "ell", "gre", "Greek", "Ελληνικά")
    ENGLISH = ("en", "eng", "eng", "English", "English")
    SPANISH = ("es", "spa", "spa", "Spanish", "Español")
    ESTONIAN = ("et", "est", "est", "Estonian", "Eesti")
    BASQUE = ("eu", "eus", "baq", "Basque", "Euskara")
    PERSIAN = ("fa", "fas", "per", "Persian", "فارسی")
    FINNISH = ("fi", "fin", "fin", "Finnish", "Suomi")
    FAROESE = ("fo", "fao", "fao", "Faroese", "Føroyskt")
    FRENCH = ("fr", "fra", "fre", "French", "Français")
    GALICIAN = ("gl", "glg", "glg", "Galician", "Galego")
    GUJARATI = ("gu", "guj", "guj", "Gujarati", "ગુજરાતી")
    HAUSA = ("ha", "hau", "hau", "Hausa", "Hausa")
    HAWAIIAN = ("haw", "haw", "haw", "Hawaiian", "ʻŌlelo Hawaiʻi")
    HEBREW = ("he", "heb", "heb", "Hebrew", "עברית")
    HINDI = ("hi", "hin", "hin", "Hindi", "हिन्दी")
    CROATIAN = ("hr", "hrv", "hrv", "Croatian", "Hrvatski")
    HAITIAN_CREOLE = ("ht", "hat", "hat", "Haitian Creole", "Kreyòl Ayisyen")
    HUNGARIAN = ("hu", "hun", "hun", "Hungarian", "Magyar")
    ARMENIAN = ("hy", "hye", "arm", "Armenian", "Հայերեն")
    INDONESIAN = ("id", "ind", "ind", "Indonesian", "Bahasa Indonesia")
    ICELANDIC = ("is", "isl", "ice", "Icelandic", "Íslenska")
    ITALIAN = ("it", "ita", "ita", "Italian", "Italiano")
    JAPANESE = ("ja", "jpn", "jpn", "Japanese", "日本語")
    JAVANESE = ("jw", "jav", "jav", "Javanese", "ꦧꦱꦗꦮ")
    GEORGIAN = ("ka", "kat", "geo", "Georgian", "ქართული")
    KAZAKH = ("kk", "kaz", "kaz", "Kazakh", "Қазақша")
    KHMER = ("km", "khm", "khm", "Khmer", "ភាសាខ្មែរ")
    KANNADA = ("kn", "kan", "kan", "Kannada", "ಕನ್ನಡ")
    KOREAN = ("ko", "kor", "kor", "Korean", "한국어")
    LATIN = ("la", "lat", "lat", "Latin", "Latina")
    LUXEMBOURGISH = ("lb", "ltz", "ltz", "Luxembourgish", "Lëtzebuergesch")
    LINGALA = ("ln", "lin", "lin", "Lingala", "Lingála")
    LAO = ("lo", "lao", "lao", "Lao", "ພາສາລາວ")
    LITHUANIAN = ("lt", "lit", "lit", "Lithuanian", "Lietuvių")
    LATVIAN = ("lv", "lav", "lav", "Latvian", "Latviešu")
    MALAGASY = ("mg", "mlg", "mlg", "Malagasy", "Malagasy")
    MAORI = ("mi", "mri", "mao", "Maori", "Te Reo Māori")
    MACEDONIAN = ("mk", "mkd", "mac", "Macedonian", "Македонски")
    MALAYALAM = ("ml", "mal", "mal", "Malayalam", "മലയാളം")
    MONGOLIAN = ("mn", "mon", "mon", "Mongolian", "Монгол")
    MARATHI = ("mr", "mar", "mar", "Marathi", "मराठी")
    MALAY = ("ms", "msa", "may", "Malay", "Bahasa Melayu")
    MALTESE = ("mt", "mlt", "mlt", "Maltese", "Malti")
    BURMESE = ("my", "mya", "bur", "Burmese", "မြန်မာစာ")
    NEPALI = ("ne", "nep", "nep", "Nepali", "नेपाली")
    DUTCH = ("nl", "nld", "dut", "Dutch", "Nederlands")
    NORWEGIAN_NYNORSK = ("nn", "nno", "nno", "Norwegian Nynorsk", "Nynorsk")
    NORWEGIAN = ("no", "nor", "nor", "Norwegian", "Norsk")
    OCCITAN = ("oc", "oci", "oci", "Occitan", "Occitan")
    PUNJABI = ("pa", "pan", "pan", "Punjabi", "ਪੰਜਾਬੀ")
    POLISH = ("pl", "pol", "pol", "Polish", "Polski")
    PASHTO = ("ps", "pus", "pus", "Pashto", "پښتو")
    PORTUGUESE = ("pt", "por", "por", "Portuguese", "Português")
    ROMANIAN = ("ro", "ron", "rum", "Romanian", "Română")
    RUSSIAN = ("ru", "rus", "rus", "Russian", "Русский")
    SANSKRIT = ("sa", "san", "san", "Sanskrit", "संस्कृतम्")
    SINDHI = ("sd", "snd", "snd", "Sindhi", "سنڌي")
    SINHALA = ("si", "sin", "sin", "Sinhala", "සිංහල")
    SLOVAK = ("sk", "slk", "slo", "Slovak", "Slovenčina")
    SLOVENE = ("sl", "slv", "slv", "Slovene", "Slovenščina")
    SHONA = ("sn", "sna", "sna", "Shona", "ChiShona")
    SOMALI = ("so", "som", "som", "Somali", "Soomaaliga")
    ALBANIAN = ("sq", "sqi", "alb", "Albanian", "Shqip")
    SERBIAN = ("sr", "srp", "srp", "Serbian", "Српски")
    SUNDANESE = ("su", "sun", "sun", "Sundanese", "Basa Sunda")
    SWEDISH = ("sv", "swe", "swe", "Swedish", "Svenska")
    SWAHILI = ("sw", "swa", "swa", "Swahili", "Kiswahili")
    TAMIL = ("ta", "tam", "tam", "Tamil", "தமிழ்")
    TELUGU = ("te", "tel", "tel", "Telugu", "తెలుగు")
    TAJIK = ("tg", "tgk", "tgk", "Tajik", "Тоҷикӣ")
    THAI = ("th", "tha", "tha", "Thai", "ไทย")
    TURKMEN = ("tk", "tuk", "tuk", "Turkmen", "Türkmençe")
    TAGALOG = ("tl", "tgl", "tgl", "Tagalog", "Tagalog")
    TURKISH = ("tr", "tur", "tur", "Turkish", "Türkçe")
    TATAR = ("tt", "tat", "tat", "Tatar", "Татарча")
    UKRAINIAN = ("uk", "ukr", "ukr", "Ukrainian", "Українська")
    URDU = ("ur", "urd", "urd", "Urdu", "اردو")
    UZBEK = ("uz", "uzb", "uzb", "Uzbek", "Oʻzbek")
    VIETNAMESE = ("vi", "vie", "vie", "Vietnamese", "Tiếng Việt")
    YIDDISH = ("yi", "yid", "yid", "Yiddish", "ייִדיש")
    YORUBA = ("yo", "yor", "yor", "Yoruba", "Yorùbá")
    CHINESE = ("zh", "zho", "chi", "Chinese", "中文")
    CANTONESE = ("yue", "yue", "yue", "Cantonese", "粵語")
    NONE = (None, None, None, None, None)  # For no language
    # und for Undetermined aka unknown language https://www.loc.gov/standards/iso639-2/faq.html#25
    def __init__(self, iso_639_1, iso_639_2_t, iso_639_2_b, name_en, name_native):
        self.iso_639_1 = iso_639_1
        self.iso_639_2_t = iso_639_2_t
        self.iso_639_2_b = iso_639_2_b
        self.name_en = name_en
        self.name_native = name_native
    @staticmethod
    def from_iso_639_1(code):
        for lang in LanguageCode:
            if lang.iso_639_1 == code:
                return lang
        return LanguageCode.NONE
    @staticmethod
    def from_iso_639_2(code):
        for lang in LanguageCode:
            if lang.iso_639_2_t == code or lang.iso_639_2_b == code:
                return lang
        return LanguageCode.NONE
    @staticmethod
    def from_name(name : str):
        """Convert a language name (either English or native) to LanguageCode enum."""
        for lang in LanguageCode:
            if lang.name_en.lower() == name.lower() or lang.name_native.lower() == name.lower():
                return lang
        LanguageCode.NONE
    @staticmethod    
    def from_string(value: str):
        """
        Convert a string to a LanguageCode instance. Matches on ISO codes, English name, or native name.
        """
        if value is None:
            return LanguageCode.NONE
        value = value.strip().lower()
        for lang in LanguageCode:
            if lang is LanguageCode.NONE:
                continue
            elif (
                value == lang.iso_639_1
                or value == lang.iso_639_2_t
                or value == lang.iso_639_2_b
                or value == lang.name_en.lower()
                or value == lang.name_native.lower()
            ):
                return lang
        return LanguageCode.NONE
    # is valid language
    @staticmethod
    def is_valid_language(language: str):
        return LanguageCode.from_string(language) is not LanguageCode.NONE
    def to_iso_639_1(self):
        return self.iso_639_1
    def to_iso_639_2_t(self):
        return self.iso_639_2_t
    def to_iso_639_2_b(self):
        return self.iso_639_2_b
    def to_name(self, in_english=True):
        return self.name_en if in_english else self.name_native
    def __str__(self):
        if self.name_en is None:
            return "Unknown"
        return self.name_en
    def __bool__(self):
        return True if self.iso_639_1 is not None else False
    def __eq__(self, other):
        """
        Compare the LanguageCode instance to another object.
        Explicitly handle comparison to None.
        """
        if other is None:
            # If compared to None, return False unless self is None
            return self.iso_639_1 is None
        if isinstance(other, str):  # Allow comparison with a string
            return self.value == LanguageCode.from_string(other)
        if isinstance(other, LanguageCode):
            # Normal comparison for LanguageCode instances
            return self.iso_639_1 == other.iso_639_1
        # Otherwise, defer to the default equality
        return NotImplemented
--- a/launcher.py
+++ b/launcher.py
@@ -1,182 +0,0 @@
 import os
 import sys
 import urllib.request
 import subprocess
 import argparse
 def convert_to_bool(in_bool):
    # Convert the input to string and lower case, then check against true values
    return str(in_bool).lower() in ('true', 'on', '1', 'y', 'yes')
 def install_packages_from_requirements(requirements_file):
    try:
        subprocess.run(['pip3', 'install', '-r', requirements_file, '--upgrade'], check=True)
        print("Packages installed successfully using pip3.")
    except subprocess.CalledProcessError:
        try:
            subprocess.run(['pip', 'install', '-r', requirements_file, '--upgrade'], check=True)
            print("Packages installed successfully using pip.")
        except subprocess.CalledProcessError:
            print("Failed to install packages using both pip3 and pip.")
 def download_from_github(url, output_file):
    try:
        with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
            data = response.read()
            out_file.write(data)
        print(f"File downloaded successfully to {output_file}")
    except urllib.error.HTTPError as e:
        print(f"Failed to download file from {url}. HTTP Error Code: {e.code}")
    except urllib.error.URLError as e:
        print(f"URL Error: {e.reason}")
    except Exception as e:
        print(f"An error occurred: {e}")
 def prompt_and_save_bazarr_env_variables():
    instructions = (
        "You will be prompted for several configuration values.\n"
        "If you wish to use the default value for any of them, simply press Enter without typing anything.\n"
        "The default values are shown in brackets [] next to the prompts.\n"
        "Items can be the value of true, on, 1, y, yes, false, off, 0, n, no, or an appropriate text response.\n"
    )
    print(instructions)
    env_vars = {
        'WHISPER_MODEL': ('Whisper Model', 'Enter the Whisper model you want to run: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, distil-large-v2, distil-medium.en, distil-small.en', 'medium'),
        'WEBHOOKPORT': ('Webhook Port', 'Default listening port for transcriptarr.py', '9000'),
        'TRANSCRIBE_DEVICE': ('Transcribe Device', 'Set as cpu or gpu', 'gpu'),
        # Defaulting to False here for the prompt, user can change
        'DEBUG': ('Debug', 'Enable debug logging (true/false)', 'False'),
        'CLEAR_VRAM_ON_COMPLETE': ('Clear VRAM', 'Attempt to clear VRAM when complete (Windows users may need to set this to False)', 'False'),
        'APPEND': ('Append', 'Append \'Transcribed by whisper\' to generated subtitle (true/false)', 'False'),
    }
    user_input = {}
    with open('.env', 'w') as file:
        for var, (description, prompt, default) in env_vars.items():
            value = input(f"{prompt} [{default}]: ") or default
            file.write(f"{var}={value}\n")
    print("Environment variables have been saved to .env")
 def load_env_variables(env_filename='.env'):
    try:
        with open(env_filename, 'r') as file:
            for line in file:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    var, value = line.split('=', 1)
                    # Only set if not already set by a higher priority mechanism (like external env var)
                    # For this simple loader, we'll let it overwrite,
                    # and CLI args will overwrite these later if specified.
                    os.environ[var] = value
        print(f"Environment variables have been loaded from {env_filename}")
    except FileNotFoundError:
        print(f"{env_filename} file not found. Consider running with --setup-bazarr or creating it manually.")
 def main():
    if 'python3' in sys.executable:
        python_cmd = 'python3'
    elif 'python' in sys.executable:
        python_cmd = 'python'
    else:
        print("Script started with an unknown command")
        sys.exit(1)
    if sys.version_info[0] < 3:
        print(f"This script requires Python 3 or higher, you are running {sys.version}")
        sys.exit(1)
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    parser = argparse.ArgumentParser(prog="python launcher.py", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # Changed: action='store_true' means it's False by default, True if flag is present
    parser.add_argument('-d', '--debug', action='store_true', help="Enable console debugging (overrides .env and external ENV)")
    parser.add_argument('-i', '--install', action='store_true', help="Install/update all necessary packages")
    # Changed: action='store_true'
    parser.add_argument('-a', '--append', action='store_true', help="Append 'Transcribed by whisper' (overrides .env and external ENV)")
    parser.add_argument('-u', '--update', action='store_true', help="Update Subgen")
    parser.add_argument('-x', '--exit-early', action='store_true', help="Exit without running transcriptarr.py")
    parser.add_argument('-s', '--setup-bazarr', action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs")
    parser.add_argument('-b', '--branch', type=str, default='main', help='Specify the branch to download from')
    parser.add_argument('-l', '--launcher-update', action='store_true', help="Update launcher.py and re-launch")
    args = parser.parse_args()
    branch_name = args.branch if args.branch != 'main' else os.getenv('BRANCH', 'main')
    script_name_suffix = f"-{branch_name}.py" if branch_name != "main" else ".py"
    subgen_script_to_run = f"subgen{script_name_suffix}"
    language_code_script_to_download = f"language_code{script_name_suffix}"
    if args.launcher_update or convert_to_bool(os.getenv('LAUNCHER_UPDATE')):
        print(f"Updating launcher.py from GitHub branch {branch_name}...")
        download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/launcher.py", f'launcher{script_name_suffix}')
        excluded_args = ['--launcher-update', '-l']
        new_args = [arg for arg in sys.argv[1:] if arg not in excluded_args]
        print(f"Relaunching updated launcher: launcher{script_name_suffix}")
        os.execl(sys.executable, sys.executable, f"launcher{script_name_suffix}", *new_args)
        # The script will not continue past os.execl
    # --- Environment Variable Handling ---
    # 1. Load from .env file first. This sets a baseline.
    #    External environment variables (set before launcher.py) will already be in os.environ
    #    and won't be overwritten by load_env_variables IF load_env_variables checked for existence.
    #    For simplicity, this version of load_env_variables *will* overwrite.
    #    If you need to preserve external env vars over .env, load_env_variables needs adjustment.
    if args.setup_bazarr:
        prompt_and_save_bazarr_env_variables()
        # After saving, load them immediately for this run
        load_env_variables()
    else:
        # Load if not setting up, assuming .env might exist
        load_env_variables()
    # 2. Override with command-line arguments (highest priority for these specific flags)
    if args.debug: # If -d or --debug was passed
        os.environ['DEBUG'] = 'True'
        print("Launcher CLI: DEBUG set to True")
    elif 'DEBUG' not in os.environ: # If not set by CLI and not by .env or external
        os.environ['DEBUG'] = 'False' # Default to False if nothing else specified it
        print("Launcher: DEBUG defaulted to False (no prior setting)")
    if args.append: # If -a or --append was passed
        os.environ['APPEND'] = 'True'
        print("Launcher CLI: APPEND set to True")
    elif 'APPEND' not in os.environ: # If not set by CLI and not by .env or external
        os.environ['APPEND'] = 'False' # Default to False if nothing else specified it
        #print("Launcher: APPEND defaulted to False (no prior setting)")
    # --- End Environment Variable Handling ---
    requirements_url = "https://raw.githubusercontent.com/McCloudS/subgen/main/requirements.txt"
    requirements_file = "requirements.txt"
    if args.install:
        download_from_github(requirements_url, requirements_file)
        install_packages_from_requirements(requirements_file)
    if not os.path.exists(subgen_script_to_run) or args.update or convert_to_bool(os.getenv('UPDATE')):
        print(f"Downloading {subgen_script_to_run} from GitHub branch {branch_name}...")
        download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/transcriptarr.py", subgen_script_to_run)
        print(f"Downloading {language_code_script_to_download} from GitHub branch {branch_name}...")
        download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/language_code.py", language_code_script_to_download)
    else:
        print(f"{subgen_script_to_run} exists and UPDATE is set to False, skipping download.")
    if not args.exit_early:
        #print(f"DEBUG environment variable for transcriptarr.py: {os.getenv('DEBUG')}")
        #print(f"APPEND environment variable for transcriptarr.py: {os.getenv('APPEND')}")
        print(f'Launching {subgen_script_to_run}')
        try:
            subprocess.run([python_cmd, '-u', subgen_script_to_run], check=True)
        except FileNotFoundError:
            print(f"Error: Could not find {subgen_script_to_run}. Make sure it was downloaded correctly.")
        except subprocess.CalledProcessError as e:
            print(f"Error running {subgen_script_to_run}: {e}")
    else:
        print("Not running transcriptarr.py: -x or --exit-early set")
 if __name__ == "__main__":
    main()
--- a/subgen.xml
+++ b/subgen.xml
@@ -1,56 +0,0 @@
 <?xml version="1.0"?>
 <Container version="2">
    <Name>subgen</Name>
  <ExtraParams>--gpus all</ExtraParams>
 	<Beta>false</Beta>
 	<Category>CATEGORY:</Category>
    <Repository>mccloud/subgen</Repository>
    <Registry>https://github.com/McCloudS/subgen</Registry>
    <DonateText>If you appreciate my work, then please consider donating</DonateText>
    <DonateLink>https://www.paypal.com/donate/?hosted_button_id=SU4QQP6LH5PF6</DonateLink>
    <DonateImg>https://www.paypal.com/en_US/i/btn/btn_donate_SM.gif</DonateImg>
    <Network>bridge</Network>
    <Privileged>false</Privileged>
    <Support>https://github.com/McCloudS/subgen/issues</Support>
    <Shell>bash</Shell>
    <GitHub>https://github.com/McCloudS/subgen</GitHub>
    <ReadMe>https://github.com/McCloudS/subgen/blob/main/README.md</ReadMe>
    <Project>https://github.com/McCloudS/subgen</Project>
    <Overview>subgen will transcribe your personal media on a Plex, Emby, or Jellyfin server to create subtitles (.srt) from audio/video files, it can also be used as a Whisper Provider in Bazarr</Overview>
    <WebUI>http://[IP]:[PORT:9000]/docs</WebUI>
    <TemplateURL>https://github.com/McCloudS/subgen/blob/main/subgen.xml</TemplateURL>
    <Icon>https://raw.githubusercontent.com/McCloudS/subgen/main/icon.png</Icon>
    <Date>2024-03-23</Date>
    <Changes></Changes>
    <Config Name="Port: Webhook Port" Target="9000" Default="9000" Mode="tcp" Description="This is the port for the webhook" Type="Port" Display="always" Required="true" Mask="false"/>
    <Config Name="Path: /subgen" Target="/subgen" Default="/mnt/user/appdata/subgen" Mode="rw" Description="This is the container path to your configuration files." Type="Path" Display="always" Required="true" Mask="false"/>
    <Config Name="Variable: TRANSCRIBE_DEVICE" Target="TRANSCRIBE_DEVICE" Default="gpu" Description="Can transcribe via gpu (Cuda only) or cpu. Takes option of 'cpu', 'gpu', 'cuda'." Type="Variable" Display="always" Required="false" Mask="false"/>
    <Config Name="Variable: WHISPER_MODEL" Target="WHISPER_MODEL" Default="medium" Description="Can be:'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1','large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-medium.en', 'distil-small.en'" Type="Variable" Display="always" Required="false" Mask="false"/>
    <Config Name="Variable: CONCURRENT_TRANSCRIPTIONS" Target="CONCURRENT_TRANSCRIPTIONS" Default="2" Description="Number of files it will transcribe in parallel" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: WHISPER_THREADS" Target="WHISPER_THREADS" Default="4" Description="number of threads to use during computation" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: MODEL_PATH" Target="MODEL_PATH" Default="./models" Description="This is where the WHISPER_MODEL will be stored. This defaults to placing it where you execute the script in the folder 'models'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: PROCADDEDMEDIA" Target="PROCADDEDMEDIA" Default="True" Description="will gen subtitles for all media added regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: PROCMEDIAONPLAY" Target="PROCMEDIAONPLAY" Default="True" Description="will gen subtitles for all played media regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: NAMESUBLANG" Target="NAMESUBLANG" Default="aa" Description="allows you to pick what it will name the subtitle. Instead of using EN, I'm using AA, so it doesn't mix with exiting external EN subs, and AA will populate higher on the list in Plex." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: SKIPIFINTERNALSUBLANG" Target="SKIPIFINTERNALSUBLANG" Default="eng" Description="Will not generate a subtitle if the file has an internal sub matching the 3 letter code of this variable (See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: WORD_LEVEL_HIGHLIGHT" Target="WORD_LEVEL_HIGHLIGHT" Default="False" Description="Highlights each words as it's spoken in the subtitle. See example video @ https://github.com/jianfch/stable-ts" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: PLEXSERVER" Target="PLEXSERVER" Default="http://plex:32400" Description="This needs to be set to your local plex server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: PLEXTOKEN" Target="PLEXTOKEN" Default="token here" Description="This needs to be set to your plex token found by https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: JELLYFINSERVER" Target="JELLYFINSERVER" Default="http://jellyfin:8096" Description="Set to your Jellyfin server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: JELLYFINTOKEN" Target="JELLYFINTOKEN" Default="token here" Description="Generate a token inside the Jellyfin interface" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: WEBHOOKPORT" Target="WEBHOOKPORT" Default="9000" Description="Change this if you need a different port for your webhook" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: TRANSCRIBE_FOLDERS" Target="TRANSCRIBE_FOLDERS" Default="" Description="Takes a pipe '|' separated list (For example: /tv|/movies|/familyvideos) and iterates through and adds those files to be queued for subtitle generation if they don't have internal subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: TRANSCRIBE_OR_TRANSLATE" Target="TRANSCRIBE_OR_TRANSLATE" Default="transcribe" Description="Takes either 'transcribe' or 'translate'. Transcribe will transcribe the audio in the same language as the input. Translate will transcribe and translate into English." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: COMPUTE_TYPE" Target="COMPUTE_TYPE" Default="auto" Description="Set compute-type using the following information: https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: DEBUG" Target="DEBUG" Default="True" Description="Provides some debug data that can be helpful to troubleshoot path mapping and other issues." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: FORCE_DETECTED_LANGUAGE_TO" Target="FORCE_DETECTED_LANGUAGE_TO" Default="" Description="This is to force the model to a language instead of the detected one, takes a 2 letter language code. For example, your audio is French but keeps detecting as English, you would set it to 'fr'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: CLEAR_VRAM_ON_COMPLETE" Target="CLEAR_VRAM_ON_COMPLETE" Default="False" Description="This will delete the model and do garbage collection when queue is empty. Good if you need to use the VRAM for something else." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: UPDATE" Target="UPDATE" Default="True" Description="Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates." Type="Variable" Display="always" Required="false" Mask="false"/>
    <Config Name="Variable: APPEND" Target="APPEND" Default="False" Description="Will add the following at the end of a subtitle: 'Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: MONITOR" Target="MONITOR" Default="False" Description="Will monitor TRANSCRIBE_FOLDERS for real-time changes to see if we need to generate subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: USE_MODEL_PROMPT" Target="USE_MODEL_PROMPT" Default="False" Description="When set to True, will use the default prompt stored in greetings_translations 'Hello, welcome to my lecture.' to try and force the use of punctuation in transcriptions that don't." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: CUSTOM_MODEL_PROMPT" Target="CUSTOM_MODEL_PROMPT" Default="" Description="If USE_MODEL_PROMPT is True, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples)." Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: LRC_FOR_AUDIO_FILES" Target="LRC_FOR_AUDIO_FILES" Default="True" Description="Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
    <Config Name="Variable: CUSTOM_REGROUP" Target="CUSTOM_REGROUP" Default="cm_sl=84_sl=42++++++1" Description="Attempts to regroup some of the segments to make a cleaner looking subtitle. See Issue #68 for discussion. Set to blank if you want to use Stable-TS default regroups algorithm of cm_sp=,* /，_sg=.5_mg=.3+3_sp=.* /。/?/？'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
 </Container>
--- a/test_backend.py
+++ b/test_backend.py
@@ -1,163 +0,0 @@
 #!/usr/bin/env python3
 """Test script for TranscriptorIO backend components."""
 import sys
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 def test_config():
    """Test configuration loading."""
    logger.info("Testing configuration...")
    try:
        from backend.config import settings
        logger.info(f"✓ Config loaded successfully")
        logger.info(f"  - Mode: {settings.transcriptarr_mode}")
        logger.info(f"  - Database: {settings.database_type.value}")
        logger.info(f"  - Whisper Model: {settings.whisper_model}")
        logger.info(f"  - Device: {settings.transcribe_device}")
        return True
    except Exception as e:
        logger.error(f"✗ Config test failed: {e}")
        return False
 def test_database():
    """Test database connection and table creation."""
    logger.info("\nTesting database...")
    try:
        from backend.core.database import database
        from backend.core.models import Base
        # Clean database for fresh test
        try:
            database.drop_tables()
            logger.info(f"  - Dropped existing tables for clean test")
        except:
            pass
        database.create_tables()
        logger.info(f"✓ Database initialized with fresh tables")
        # Test connection with health check
        if database.health_check():
            logger.info(f"✓ Database connection OK")
        else:
            logger.error("✗ Database health check failed (but tables were created)")
            # Don't fail the test if health check fails but tables exist
            return True
        # Get stats
        stats = database.get_stats()
        logger.info(f"  - Type: {stats['type']}")
        logger.info(f"  - URL: {stats['url']}")
        return True
    except Exception as e:
        logger.error(f"✗ Database test failed: {e}")
        import traceback
        traceback.print_exc()
        return False
 def test_queue_manager():
    """Test queue manager operations."""
    logger.info("\nTesting queue manager...")
    try:
        from backend.core.queue_manager import queue_manager
        from backend.core.models import QualityPreset
        # Add a test job
        job = queue_manager.add_job(
            file_path="/test/anime.mkv",
            file_name="anime.mkv",
            source_lang="ja",
            target_lang="es",
            quality_preset=QualityPreset.FAST,
            priority=5
        )
        if job:
            logger.info(f"✓ Job created: {job.id}")
            logger.info(f"  - File: {job.file_name}")
            logger.info(f"  - Status: {job.status.value}")
            logger.info(f"  - Priority: {job.priority}")
        else:
            logger.error("✗ Failed to create job")
            return False
        # Get queue stats
        stats = queue_manager.get_queue_stats()
        logger.info(f"✓ Queue stats:")
        logger.info(f"  - Total: {stats['total']}")
        logger.info(f"  - Queued: {stats['queued']}")
        logger.info(f"  - Processing: {stats['processing']}")
        logger.info(f"  - Completed: {stats['completed']}")
        # Try to add duplicate
        duplicate = queue_manager.add_job(
            file_path="/test/anime.mkv",
            file_name="anime.mkv",
            source_lang="ja",
            target_lang="es",
            quality_preset=QualityPreset.FAST
        )
        if duplicate is None:
            logger.info(f"✓ Duplicate detection working")
        else:
            logger.warning(f"⚠ Duplicate job was created (should have been rejected)")
        # Get next job
        next_job = queue_manager.get_next_job("test-worker-1")
        if next_job:
            logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)")
            logger.info(f"  - Status: {next_job.status.value}")
        else:
            logger.error("✗ Failed to get next job")
            return False
        return True
    except Exception as e:
        logger.error(f"✗ Queue manager test failed: {e}")
        import traceback
        traceback.print_exc()
        return False
 def main():
    """Run all tests."""
    logger.info("=" * 60)
    logger.info("TranscriptorIO Backend Test Suite")
    logger.info("=" * 60)
    results = {
        "Config": test_config(),
        "Database": test_database(),
        "Queue Manager": test_queue_manager(),
    }
    logger.info("\n" + "=" * 60)
    logger.info("Test Results:")
    logger.info("=" * 60)
    all_passed = True
    for test_name, passed in results.items():
        status = "✓ PASSED" if passed else "✗ FAILED"
        logger.info(f"{test_name}: {status}")
        if not passed:
            all_passed = False
    logger.info("=" * 60)
    if all_passed:
        logger.info("🎉 All tests passed!")
        return 0
    else:
        logger.error("❌ Some tests failed")
        return 1
 if __name__ == "__main__":
    sys.exit(main())
--- a/transcriptarr.py
+++ b/transcriptarr.py