chore: cleanup legacy files and update gitignore

- Remove obsolete Docker files (will be recreated later)
- Remove legacy launcher.py and transcriptarr.py
- Remove subgen.xml configuration
- Remove test_backend.py (tests will be restructured)
- Remove language_code.py from root (moved to backend/core/)
- Update .gitignore for Python project structure
This commit is contained in:
2026-01-16 15:10:14 +01:00
parent 8acbe84b15
commit 9655686a50
9 changed files with 9 additions and 2498 deletions

10
.gitignore vendored
View File

@@ -9,4 +9,12 @@
#ignore our settings #ignore our settings
.env .env
models/ models/
transcriptarr.db
# Python cache
__pycache__/
**/__pycache__/
*.pyc
*.pyo

View File

@@ -1,45 +0,0 @@
# Stage 1: Builder
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
WORKDIR /subgen
ARG DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
ffmpeg \
git \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Stage 2: Runtime
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
WORKDIR /subgen
# Copy necessary files from the builder stage
COPY --from=builder /subgen/launcher.py .
COPY --from=builder /subgen/subgen.py .
COPY --from=builder /subgen/language_code.py .
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
python3 \
curl \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1
# Set command to run the application
CMD ["python3", "launcher.py"]

View File

@@ -1,34 +0,0 @@
# === Stage 1: Build dependencies and install packages ===
FROM python:3.11-slim-bullseye AS builder
WORKDIR /subgen
# Install required build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
git \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# Copy and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir --prefix=/install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cpu && pip install --no-cache-dir --prefix=/install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
# === Stage 2: Create a minimal runtime image ===
FROM python:3.11-slim-bullseye AS runtime
WORKDIR /subgen
# Install only required runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy only necessary files from builder stage
COPY --from=builder /install /usr/local
# Copy source code
COPY launcher.py subgen.py language_code.py /subgen/
CMD ["python3", "launcher.py"]

View File

@@ -1,40 +0,0 @@
#docker-compose.yml
version: '2'
services:
subgen:
container_name: subgen
tty: true
image: mccloud/subgen
environment:
- "WHISPER_MODEL=medium"
- "WHISPER_THREADS=4"
- "PROCADDEDMEDIA=True"
- "PROCMEDIAONPLAY=False"
- "NAMESUBLANG=aa"
- "SKIPIFINTERNALSUBLANG=eng"
- "PLEXTOKEN=plextoken"
- "PLEXSERVER=http://plexserver:32400"
- "JELLYFINTOKEN=token here"
- "JELLYFINSERVER=http://jellyfin:8096"
- "WEBHOOKPORT=9000"
- "CONCURRENT_TRANSCRIPTIONS=2"
- "WORD_LEVEL_HIGHLIGHT=False"
- "DEBUG=True"
- "USE_PATH_MAPPING=False"
- "PATH_MAPPING_FROM=/tv"
- "PATH_MAPPING_TO=/Volumes/TV"
- "TRANSCRIBE_DEVICE=cpu"
- "CLEAR_VRAM_ON_COMPLETE=True"
- "MODEL_PATH=./models"
- "UPDATE=False"
- "APPEND=False"
- "USE_MODEL_PROMPT=False"
- "CUSTOM_MODEL_PROMPT="
- "LRC_FOR_AUDIO_FILES=True"
- "CUSTOM_REGROUP=cm_sl=84_sl=42++++++1"
volumes:
- "${TV}:/tv"
- "${MOVIES}:/movies"
- "${APPDATA}/subgen/models:/subgen/models"
ports:
- "9000:9000"

View File

@@ -1,198 +0,0 @@
from enum import Enum
class LanguageCode(Enum):
# ISO 639-1, ISO 639-2/T, ISO 639-2/B, English Name, Native Name
AFAR = ("aa", "aar", "aar", "Afar", "Afar")
AFRIKAANS = ("af", "afr", "afr", "Afrikaans", "Afrikaans")
AMHARIC = ("am", "amh", "amh", "Amharic", "አማርኛ")
ARABIC = ("ar", "ara", "ara", "Arabic", "العربية")
ASSAMESE = ("as", "asm", "asm", "Assamese", "অসমীয়া")
AZERBAIJANI = ("az", "aze", "aze", "Azerbaijani", "Azərbaycanca")
BASHKIR = ("ba", "bak", "bak", "Bashkir", "Башҡортса")
BELARUSIAN = ("be", "bel", "bel", "Belarusian", "Беларуская")
BULGARIAN = ("bg", "bul", "bul", "Bulgarian", "Български")
BENGALI = ("bn", "ben", "ben", "Bengali", "বাংলা")
TIBETAN = ("bo", "bod", "tib", "Tibetan", "བོད་ཡིག")
BRETON = ("br", "bre", "bre", "Breton", "Brezhoneg")
BOSNIAN = ("bs", "bos", "bos", "Bosnian", "Bosanski")
CATALAN = ("ca", "cat", "cat", "Catalan", "Català")
CZECH = ("cs", "ces", "cze", "Czech", "Čeština")
WELSH = ("cy", "cym", "wel", "Welsh", "Cymraeg")
DANISH = ("da", "dan", "dan", "Danish", "Dansk")
GERMAN = ("de", "deu", "ger", "German", "Deutsch")
GREEK = ("el", "ell", "gre", "Greek", "Ελληνικά")
ENGLISH = ("en", "eng", "eng", "English", "English")
SPANISH = ("es", "spa", "spa", "Spanish", "Español")
ESTONIAN = ("et", "est", "est", "Estonian", "Eesti")
BASQUE = ("eu", "eus", "baq", "Basque", "Euskara")
PERSIAN = ("fa", "fas", "per", "Persian", "فارسی")
FINNISH = ("fi", "fin", "fin", "Finnish", "Suomi")
FAROESE = ("fo", "fao", "fao", "Faroese", "Føroyskt")
FRENCH = ("fr", "fra", "fre", "French", "Français")
GALICIAN = ("gl", "glg", "glg", "Galician", "Galego")
GUJARATI = ("gu", "guj", "guj", "Gujarati", "ગુજરાતી")
HAUSA = ("ha", "hau", "hau", "Hausa", "Hausa")
HAWAIIAN = ("haw", "haw", "haw", "Hawaiian", "ʻŌlelo Hawaiʻi")
HEBREW = ("he", "heb", "heb", "Hebrew", "עברית")
HINDI = ("hi", "hin", "hin", "Hindi", "हिन्दी")
CROATIAN = ("hr", "hrv", "hrv", "Croatian", "Hrvatski")
HAITIAN_CREOLE = ("ht", "hat", "hat", "Haitian Creole", "Kreyòl Ayisyen")
HUNGARIAN = ("hu", "hun", "hun", "Hungarian", "Magyar")
ARMENIAN = ("hy", "hye", "arm", "Armenian", "Հայերեն")
INDONESIAN = ("id", "ind", "ind", "Indonesian", "Bahasa Indonesia")
ICELANDIC = ("is", "isl", "ice", "Icelandic", "Íslenska")
ITALIAN = ("it", "ita", "ita", "Italian", "Italiano")
JAPANESE = ("ja", "jpn", "jpn", "Japanese", "日本語")
JAVANESE = ("jw", "jav", "jav", "Javanese", "ꦧꦱꦗꦮ")
GEORGIAN = ("ka", "kat", "geo", "Georgian", "ქართული")
KAZAKH = ("kk", "kaz", "kaz", "Kazakh", "Қазақша")
KHMER = ("km", "khm", "khm", "Khmer", "ភាសាខ្មែរ")
KANNADA = ("kn", "kan", "kan", "Kannada", "ಕನ್ನಡ")
KOREAN = ("ko", "kor", "kor", "Korean", "한국어")
LATIN = ("la", "lat", "lat", "Latin", "Latina")
LUXEMBOURGISH = ("lb", "ltz", "ltz", "Luxembourgish", "Lëtzebuergesch")
LINGALA = ("ln", "lin", "lin", "Lingala", "Lingála")
LAO = ("lo", "lao", "lao", "Lao", "ພາສາລາວ")
LITHUANIAN = ("lt", "lit", "lit", "Lithuanian", "Lietuvių")
LATVIAN = ("lv", "lav", "lav", "Latvian", "Latviešu")
MALAGASY = ("mg", "mlg", "mlg", "Malagasy", "Malagasy")
MAORI = ("mi", "mri", "mao", "Maori", "Te Reo Māori")
MACEDONIAN = ("mk", "mkd", "mac", "Macedonian", "Македонски")
MALAYALAM = ("ml", "mal", "mal", "Malayalam", "മലയാളം")
MONGOLIAN = ("mn", "mon", "mon", "Mongolian", "Монгол")
MARATHI = ("mr", "mar", "mar", "Marathi", "मराठी")
MALAY = ("ms", "msa", "may", "Malay", "Bahasa Melayu")
MALTESE = ("mt", "mlt", "mlt", "Maltese", "Malti")
BURMESE = ("my", "mya", "bur", "Burmese", "မြန်မာစာ")
NEPALI = ("ne", "nep", "nep", "Nepali", "नेपाली")
DUTCH = ("nl", "nld", "dut", "Dutch", "Nederlands")
NORWEGIAN_NYNORSK = ("nn", "nno", "nno", "Norwegian Nynorsk", "Nynorsk")
NORWEGIAN = ("no", "nor", "nor", "Norwegian", "Norsk")
OCCITAN = ("oc", "oci", "oci", "Occitan", "Occitan")
PUNJABI = ("pa", "pan", "pan", "Punjabi", "ਪੰਜਾਬੀ")
POLISH = ("pl", "pol", "pol", "Polish", "Polski")
PASHTO = ("ps", "pus", "pus", "Pashto", "پښتو")
PORTUGUESE = ("pt", "por", "por", "Portuguese", "Português")
ROMANIAN = ("ro", "ron", "rum", "Romanian", "Română")
RUSSIAN = ("ru", "rus", "rus", "Russian", "Русский")
SANSKRIT = ("sa", "san", "san", "Sanskrit", "संस्कृतम्")
SINDHI = ("sd", "snd", "snd", "Sindhi", "سنڌي")
SINHALA = ("si", "sin", "sin", "Sinhala", "සිංහල")
SLOVAK = ("sk", "slk", "slo", "Slovak", "Slovenčina")
SLOVENE = ("sl", "slv", "slv", "Slovene", "Slovenščina")
SHONA = ("sn", "sna", "sna", "Shona", "ChiShona")
SOMALI = ("so", "som", "som", "Somali", "Soomaaliga")
ALBANIAN = ("sq", "sqi", "alb", "Albanian", "Shqip")
SERBIAN = ("sr", "srp", "srp", "Serbian", "Српски")
SUNDANESE = ("su", "sun", "sun", "Sundanese", "Basa Sunda")
SWEDISH = ("sv", "swe", "swe", "Swedish", "Svenska")
SWAHILI = ("sw", "swa", "swa", "Swahili", "Kiswahili")
TAMIL = ("ta", "tam", "tam", "Tamil", "தமிழ்")
TELUGU = ("te", "tel", "tel", "Telugu", "తెలుగు")
TAJIK = ("tg", "tgk", "tgk", "Tajik", "Тоҷикӣ")
THAI = ("th", "tha", "tha", "Thai", "ไทย")
TURKMEN = ("tk", "tuk", "tuk", "Turkmen", "Türkmençe")
TAGALOG = ("tl", "tgl", "tgl", "Tagalog", "Tagalog")
TURKISH = ("tr", "tur", "tur", "Turkish", "Türkçe")
TATAR = ("tt", "tat", "tat", "Tatar", "Татарча")
UKRAINIAN = ("uk", "ukr", "ukr", "Ukrainian", "Українська")
URDU = ("ur", "urd", "urd", "Urdu", "اردو")
UZBEK = ("uz", "uzb", "uzb", "Uzbek", "Oʻzbek")
VIETNAMESE = ("vi", "vie", "vie", "Vietnamese", "Tiếng Việt")
YIDDISH = ("yi", "yid", "yid", "Yiddish", "ייִדיש")
YORUBA = ("yo", "yor", "yor", "Yoruba", "Yorùbá")
CHINESE = ("zh", "zho", "chi", "Chinese", "中文")
CANTONESE = ("yue", "yue", "yue", "Cantonese", "粵語")
NONE = (None, None, None, None, None) # For no language
# und for Undetermined aka unknown language https://www.loc.gov/standards/iso639-2/faq.html#25
def __init__(self, iso_639_1, iso_639_2_t, iso_639_2_b, name_en, name_native):
self.iso_639_1 = iso_639_1
self.iso_639_2_t = iso_639_2_t
self.iso_639_2_b = iso_639_2_b
self.name_en = name_en
self.name_native = name_native
@staticmethod
def from_iso_639_1(code):
for lang in LanguageCode:
if lang.iso_639_1 == code:
return lang
return LanguageCode.NONE
@staticmethod
def from_iso_639_2(code):
for lang in LanguageCode:
if lang.iso_639_2_t == code or lang.iso_639_2_b == code:
return lang
return LanguageCode.NONE
@staticmethod
def from_name(name : str):
"""Convert a language name (either English or native) to LanguageCode enum."""
for lang in LanguageCode:
if lang.name_en.lower() == name.lower() or lang.name_native.lower() == name.lower():
return lang
LanguageCode.NONE
@staticmethod
def from_string(value: str):
"""
Convert a string to a LanguageCode instance. Matches on ISO codes, English name, or native name.
"""
if value is None:
return LanguageCode.NONE
value = value.strip().lower()
for lang in LanguageCode:
if lang is LanguageCode.NONE:
continue
elif (
value == lang.iso_639_1
or value == lang.iso_639_2_t
or value == lang.iso_639_2_b
or value == lang.name_en.lower()
or value == lang.name_native.lower()
):
return lang
return LanguageCode.NONE
# is valid language
@staticmethod
def is_valid_language(language: str):
return LanguageCode.from_string(language) is not LanguageCode.NONE
def to_iso_639_1(self):
return self.iso_639_1
def to_iso_639_2_t(self):
return self.iso_639_2_t
def to_iso_639_2_b(self):
return self.iso_639_2_b
def to_name(self, in_english=True):
return self.name_en if in_english else self.name_native
def __str__(self):
if self.name_en is None:
return "Unknown"
return self.name_en
def __bool__(self):
return True if self.iso_639_1 is not None else False
def __eq__(self, other):
"""
Compare the LanguageCode instance to another object.
Explicitly handle comparison to None.
"""
if other is None:
# If compared to None, return False unless self is None
return self.iso_639_1 is None
if isinstance(other, str): # Allow comparison with a string
return self.value == LanguageCode.from_string(other)
if isinstance(other, LanguageCode):
# Normal comparison for LanguageCode instances
return self.iso_639_1 == other.iso_639_1
# Otherwise, defer to the default equality
return NotImplemented

View File

@@ -1,182 +0,0 @@
import os
import sys
import urllib.request
import subprocess
import argparse
def convert_to_bool(in_bool):
# Convert the input to string and lower case, then check against true values
return str(in_bool).lower() in ('true', 'on', '1', 'y', 'yes')
def install_packages_from_requirements(requirements_file):
try:
subprocess.run(['pip3', 'install', '-r', requirements_file, '--upgrade'], check=True)
print("Packages installed successfully using pip3.")
except subprocess.CalledProcessError:
try:
subprocess.run(['pip', 'install', '-r', requirements_file, '--upgrade'], check=True)
print("Packages installed successfully using pip.")
except subprocess.CalledProcessError:
print("Failed to install packages using both pip3 and pip.")
def download_from_github(url, output_file):
try:
with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
data = response.read()
out_file.write(data)
print(f"File downloaded successfully to {output_file}")
except urllib.error.HTTPError as e:
print(f"Failed to download file from {url}. HTTP Error Code: {e.code}")
except urllib.error.URLError as e:
print(f"URL Error: {e.reason}")
except Exception as e:
print(f"An error occurred: {e}")
def prompt_and_save_bazarr_env_variables():
instructions = (
"You will be prompted for several configuration values.\n"
"If you wish to use the default value for any of them, simply press Enter without typing anything.\n"
"The default values are shown in brackets [] next to the prompts.\n"
"Items can be the value of true, on, 1, y, yes, false, off, 0, n, no, or an appropriate text response.\n"
)
print(instructions)
env_vars = {
'WHISPER_MODEL': ('Whisper Model', 'Enter the Whisper model you want to run: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, distil-large-v2, distil-medium.en, distil-small.en', 'medium'),
'WEBHOOKPORT': ('Webhook Port', 'Default listening port for transcriptarr.py', '9000'),
'TRANSCRIBE_DEVICE': ('Transcribe Device', 'Set as cpu or gpu', 'gpu'),
# Defaulting to False here for the prompt, user can change
'DEBUG': ('Debug', 'Enable debug logging (true/false)', 'False'),
'CLEAR_VRAM_ON_COMPLETE': ('Clear VRAM', 'Attempt to clear VRAM when complete (Windows users may need to set this to False)', 'False'),
'APPEND': ('Append', 'Append \'Transcribed by whisper\' to generated subtitle (true/false)', 'False'),
}
user_input = {}
with open('.env', 'w') as file:
for var, (description, prompt, default) in env_vars.items():
value = input(f"{prompt} [{default}]: ") or default
file.write(f"{var}={value}\n")
print("Environment variables have been saved to .env")
def load_env_variables(env_filename='.env'):
try:
with open(env_filename, 'r') as file:
for line in file:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
var, value = line.split('=', 1)
# Only set if not already set by a higher priority mechanism (like external env var)
# For this simple loader, we'll let it overwrite,
# and CLI args will overwrite these later if specified.
os.environ[var] = value
print(f"Environment variables have been loaded from {env_filename}")
except FileNotFoundError:
print(f"{env_filename} file not found. Consider running with --setup-bazarr or creating it manually.")
def main():
if 'python3' in sys.executable:
python_cmd = 'python3'
elif 'python' in sys.executable:
python_cmd = 'python'
else:
print("Script started with an unknown command")
sys.exit(1)
if sys.version_info[0] < 3:
print(f"This script requires Python 3 or higher, you are running {sys.version}")
sys.exit(1)
os.chdir(os.path.dirname(os.path.abspath(__file__)))
parser = argparse.ArgumentParser(prog="python launcher.py", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# Changed: action='store_true' means it's False by default, True if flag is present
parser.add_argument('-d', '--debug', action='store_true', help="Enable console debugging (overrides .env and external ENV)")
parser.add_argument('-i', '--install', action='store_true', help="Install/update all necessary packages")
# Changed: action='store_true'
parser.add_argument('-a', '--append', action='store_true', help="Append 'Transcribed by whisper' (overrides .env and external ENV)")
parser.add_argument('-u', '--update', action='store_true', help="Update Subgen")
parser.add_argument('-x', '--exit-early', action='store_true', help="Exit without running transcriptarr.py")
parser.add_argument('-s', '--setup-bazarr', action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs")
parser.add_argument('-b', '--branch', type=str, default='main', help='Specify the branch to download from')
parser.add_argument('-l', '--launcher-update', action='store_true', help="Update launcher.py and re-launch")
args = parser.parse_args()
branch_name = args.branch if args.branch != 'main' else os.getenv('BRANCH', 'main')
script_name_suffix = f"-{branch_name}.py" if branch_name != "main" else ".py"
subgen_script_to_run = f"subgen{script_name_suffix}"
language_code_script_to_download = f"language_code{script_name_suffix}"
if args.launcher_update or convert_to_bool(os.getenv('LAUNCHER_UPDATE')):
print(f"Updating launcher.py from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/launcher.py", f'launcher{script_name_suffix}')
excluded_args = ['--launcher-update', '-l']
new_args = [arg for arg in sys.argv[1:] if arg not in excluded_args]
print(f"Relaunching updated launcher: launcher{script_name_suffix}")
os.execl(sys.executable, sys.executable, f"launcher{script_name_suffix}", *new_args)
# The script will not continue past os.execl
# --- Environment Variable Handling ---
# 1. Load from .env file first. This sets a baseline.
# External environment variables (set before launcher.py) will already be in os.environ
# and won't be overwritten by load_env_variables IF load_env_variables checked for existence.
# For simplicity, this version of load_env_variables *will* overwrite.
# If you need to preserve external env vars over .env, load_env_variables needs adjustment.
if args.setup_bazarr:
prompt_and_save_bazarr_env_variables()
# After saving, load them immediately for this run
load_env_variables()
else:
# Load if not setting up, assuming .env might exist
load_env_variables()
# 2. Override with command-line arguments (highest priority for these specific flags)
if args.debug: # If -d or --debug was passed
os.environ['DEBUG'] = 'True'
print("Launcher CLI: DEBUG set to True")
elif 'DEBUG' not in os.environ: # If not set by CLI and not by .env or external
os.environ['DEBUG'] = 'False' # Default to False if nothing else specified it
print("Launcher: DEBUG defaulted to False (no prior setting)")
if args.append: # If -a or --append was passed
os.environ['APPEND'] = 'True'
print("Launcher CLI: APPEND set to True")
elif 'APPEND' not in os.environ: # If not set by CLI and not by .env or external
os.environ['APPEND'] = 'False' # Default to False if nothing else specified it
#print("Launcher: APPEND defaulted to False (no prior setting)")
# --- End Environment Variable Handling ---
requirements_url = "https://raw.githubusercontent.com/McCloudS/subgen/main/requirements.txt"
requirements_file = "requirements.txt"
if args.install:
download_from_github(requirements_url, requirements_file)
install_packages_from_requirements(requirements_file)
if not os.path.exists(subgen_script_to_run) or args.update or convert_to_bool(os.getenv('UPDATE')):
print(f"Downloading {subgen_script_to_run} from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/transcriptarr.py", subgen_script_to_run)
print(f"Downloading {language_code_script_to_download} from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/language_code.py", language_code_script_to_download)
else:
print(f"{subgen_script_to_run} exists and UPDATE is set to False, skipping download.")
if not args.exit_early:
#print(f"DEBUG environment variable for transcriptarr.py: {os.getenv('DEBUG')}")
#print(f"APPEND environment variable for transcriptarr.py: {os.getenv('APPEND')}")
print(f'Launching {subgen_script_to_run}')
try:
subprocess.run([python_cmd, '-u', subgen_script_to_run], check=True)
except FileNotFoundError:
print(f"Error: Could not find {subgen_script_to_run}. Make sure it was downloaded correctly.")
except subprocess.CalledProcessError as e:
print(f"Error running {subgen_script_to_run}: {e}")
else:
print("Not running transcriptarr.py: -x or --exit-early set")
if __name__ == "__main__":
main()

View File

@@ -1,56 +0,0 @@
<?xml version="1.0"?>
<Container version="2">
<Name>subgen</Name>
<ExtraParams>--gpus all</ExtraParams>
<Beta>false</Beta>
<Category>CATEGORY:</Category>
<Repository>mccloud/subgen</Repository>
<Registry>https://github.com/McCloudS/subgen</Registry>
<DonateText>If you appreciate my work, then please consider donating</DonateText>
<DonateLink>https://www.paypal.com/donate/?hosted_button_id=SU4QQP6LH5PF6</DonateLink>
<DonateImg>https://www.paypal.com/en_US/i/btn/btn_donate_SM.gif</DonateImg>
<Network>bridge</Network>
<Privileged>false</Privileged>
<Support>https://github.com/McCloudS/subgen/issues</Support>
<Shell>bash</Shell>
<GitHub>https://github.com/McCloudS/subgen</GitHub>
<ReadMe>https://github.com/McCloudS/subgen/blob/main/README.md</ReadMe>
<Project>https://github.com/McCloudS/subgen</Project>
<Overview>subgen will transcribe your personal media on a Plex, Emby, or Jellyfin server to create subtitles (.srt) from audio/video files, it can also be used as a Whisper Provider in Bazarr</Overview>
<WebUI>http://[IP]:[PORT:9000]/docs</WebUI>
<TemplateURL>https://github.com/McCloudS/subgen/blob/main/subgen.xml</TemplateURL>
<Icon>https://raw.githubusercontent.com/McCloudS/subgen/main/icon.png</Icon>
<Date>2024-03-23</Date>
<Changes></Changes>
<Config Name="Port: Webhook Port" Target="9000" Default="9000" Mode="tcp" Description="This is the port for the webhook" Type="Port" Display="always" Required="true" Mask="false"/>
<Config Name="Path: /subgen" Target="/subgen" Default="/mnt/user/appdata/subgen" Mode="rw" Description="This is the container path to your configuration files." Type="Path" Display="always" Required="true" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_DEVICE" Target="TRANSCRIBE_DEVICE" Default="gpu" Description="Can transcribe via gpu (Cuda only) or cpu. Takes option of 'cpu', 'gpu', 'cuda'." Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: WHISPER_MODEL" Target="WHISPER_MODEL" Default="medium" Description="Can be:'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1','large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-medium.en', 'distil-small.en'" Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: CONCURRENT_TRANSCRIPTIONS" Target="CONCURRENT_TRANSCRIPTIONS" Default="2" Description="Number of files it will transcribe in parallel" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WHISPER_THREADS" Target="WHISPER_THREADS" Default="4" Description="number of threads to use during computation" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: MODEL_PATH" Target="MODEL_PATH" Default="./models" Description="This is where the WHISPER_MODEL will be stored. This defaults to placing it where you execute the script in the folder 'models'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PROCADDEDMEDIA" Target="PROCADDEDMEDIA" Default="True" Description="will gen subtitles for all media added regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PROCMEDIAONPLAY" Target="PROCMEDIAONPLAY" Default="True" Description="will gen subtitles for all played media regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: NAMESUBLANG" Target="NAMESUBLANG" Default="aa" Description="allows you to pick what it will name the subtitle. Instead of using EN, I'm using AA, so it doesn't mix with exiting external EN subs, and AA will populate higher on the list in Plex." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: SKIPIFINTERNALSUBLANG" Target="SKIPIFINTERNALSUBLANG" Default="eng" Description="Will not generate a subtitle if the file has an internal sub matching the 3 letter code of this variable (See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WORD_LEVEL_HIGHLIGHT" Target="WORD_LEVEL_HIGHLIGHT" Default="False" Description="Highlights each words as it's spoken in the subtitle. See example video @ https://github.com/jianfch/stable-ts" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PLEXSERVER" Target="PLEXSERVER" Default="http://plex:32400" Description="This needs to be set to your local plex server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PLEXTOKEN" Target="PLEXTOKEN" Default="token here" Description="This needs to be set to your plex token found by https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: JELLYFINSERVER" Target="JELLYFINSERVER" Default="http://jellyfin:8096" Description="Set to your Jellyfin server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: JELLYFINTOKEN" Target="JELLYFINTOKEN" Default="token here" Description="Generate a token inside the Jellyfin interface" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WEBHOOKPORT" Target="WEBHOOKPORT" Default="9000" Description="Change this if you need a different port for your webhook" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_FOLDERS" Target="TRANSCRIBE_FOLDERS" Default="" Description="Takes a pipe '|' separated list (For example: /tv|/movies|/familyvideos) and iterates through and adds those files to be queued for subtitle generation if they don't have internal subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_OR_TRANSLATE" Target="TRANSCRIBE_OR_TRANSLATE" Default="transcribe" Description="Takes either 'transcribe' or 'translate'. Transcribe will transcribe the audio in the same language as the input. Translate will transcribe and translate into English." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: COMPUTE_TYPE" Target="COMPUTE_TYPE" Default="auto" Description="Set compute-type using the following information: https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: DEBUG" Target="DEBUG" Default="True" Description="Provides some debug data that can be helpful to troubleshoot path mapping and other issues." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: FORCE_DETECTED_LANGUAGE_TO" Target="FORCE_DETECTED_LANGUAGE_TO" Default="" Description="This is to force the model to a language instead of the detected one, takes a 2 letter language code. For example, your audio is French but keeps detecting as English, you would set it to 'fr'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CLEAR_VRAM_ON_COMPLETE" Target="CLEAR_VRAM_ON_COMPLETE" Default="False" Description="This will delete the model and do garbage collection when queue is empty. Good if you need to use the VRAM for something else." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: UPDATE" Target="UPDATE" Default="True" Description="Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates." Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: APPEND" Target="APPEND" Default="False" Description="Will add the following at the end of a subtitle: 'Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: MONITOR" Target="MONITOR" Default="False" Description="Will monitor TRANSCRIBE_FOLDERS for real-time changes to see if we need to generate subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: USE_MODEL_PROMPT" Target="USE_MODEL_PROMPT" Default="False" Description="When set to True, will use the default prompt stored in greetings_translations 'Hello, welcome to my lecture.' to try and force the use of punctuation in transcriptions that don't." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CUSTOM_MODEL_PROMPT" Target="CUSTOM_MODEL_PROMPT" Default="" Description="If USE_MODEL_PROMPT is True, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples)." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: LRC_FOR_AUDIO_FILES" Target="LRC_FOR_AUDIO_FILES" Default="True" Description="Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CUSTOM_REGROUP" Target="CUSTOM_REGROUP" Default="cm_sl=84_sl=42++++++1" Description="Attempts to regroup some of the segments to make a cleaner looking subtitle. See Issue #68 for discussion. Set to blank if you want to use Stable-TS default regroups algorithm of cm_sp=,* /_sg=.5_mg=.3+3_sp=.* /。/?/'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
</Container>

View File

@@ -1,163 +0,0 @@
#!/usr/bin/env python3
"""Test script for TranscriptorIO backend components."""
import sys
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_config():
"""Test configuration loading."""
logger.info("Testing configuration...")
try:
from backend.config import settings
logger.info(f"✓ Config loaded successfully")
logger.info(f" - Mode: {settings.transcriptarr_mode}")
logger.info(f" - Database: {settings.database_type.value}")
logger.info(f" - Whisper Model: {settings.whisper_model}")
logger.info(f" - Device: {settings.transcribe_device}")
return True
except Exception as e:
logger.error(f"✗ Config test failed: {e}")
return False
def test_database():
"""Test database connection and table creation."""
logger.info("\nTesting database...")
try:
from backend.core.database import database
from backend.core.models import Base
# Clean database for fresh test
try:
database.drop_tables()
logger.info(f" - Dropped existing tables for clean test")
except:
pass
database.create_tables()
logger.info(f"✓ Database initialized with fresh tables")
# Test connection with health check
if database.health_check():
logger.info(f"✓ Database connection OK")
else:
logger.error("✗ Database health check failed (but tables were created)")
# Don't fail the test if health check fails but tables exist
return True
# Get stats
stats = database.get_stats()
logger.info(f" - Type: {stats['type']}")
logger.info(f" - URL: {stats['url']}")
return True
except Exception as e:
logger.error(f"✗ Database test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_queue_manager():
"""Test queue manager operations."""
logger.info("\nTesting queue manager...")
try:
from backend.core.queue_manager import queue_manager
from backend.core.models import QualityPreset
# Add a test job
job = queue_manager.add_job(
file_path="/test/anime.mkv",
file_name="anime.mkv",
source_lang="ja",
target_lang="es",
quality_preset=QualityPreset.FAST,
priority=5
)
if job:
logger.info(f"✓ Job created: {job.id}")
logger.info(f" - File: {job.file_name}")
logger.info(f" - Status: {job.status.value}")
logger.info(f" - Priority: {job.priority}")
else:
logger.error("✗ Failed to create job")
return False
# Get queue stats
stats = queue_manager.get_queue_stats()
logger.info(f"✓ Queue stats:")
logger.info(f" - Total: {stats['total']}")
logger.info(f" - Queued: {stats['queued']}")
logger.info(f" - Processing: {stats['processing']}")
logger.info(f" - Completed: {stats['completed']}")
# Try to add duplicate
duplicate = queue_manager.add_job(
file_path="/test/anime.mkv",
file_name="anime.mkv",
source_lang="ja",
target_lang="es",
quality_preset=QualityPreset.FAST
)
if duplicate is None:
logger.info(f"✓ Duplicate detection working")
else:
logger.warning(f"⚠ Duplicate job was created (should have been rejected)")
# Get next job
next_job = queue_manager.get_next_job("test-worker-1")
if next_job:
logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)")
logger.info(f" - Status: {next_job.status.value}")
else:
logger.error("✗ Failed to get next job")
return False
return True
except Exception as e:
logger.error(f"✗ Queue manager test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests."""
logger.info("=" * 60)
logger.info("TranscriptorIO Backend Test Suite")
logger.info("=" * 60)
results = {
"Config": test_config(),
"Database": test_database(),
"Queue Manager": test_queue_manager(),
}
logger.info("\n" + "=" * 60)
logger.info("Test Results:")
logger.info("=" * 60)
all_passed = True
for test_name, passed in results.items():
status = "✓ PASSED" if passed else "✗ FAILED"
logger.info(f"{test_name}: {status}")
if not passed:
all_passed = False
logger.info("=" * 60)
if all_passed:
logger.info("🎉 All tests passed!")
return 0
else:
logger.error("❌ Some tests failed")
return 1
if __name__ == "__main__":
sys.exit(main())

File diff suppressed because it is too large Load Diff