chore: cleanup legacy files and update gitignore
- Remove obsolete Docker files (will be recreated later) - Remove legacy launcher.py and transcriptarr.py - Remove subgen.xml configuration - Remove test_backend.py (tests will be restructured) - Remove language_code.py from root (moved to backend/core/) - Update .gitignore for Python project structure
This commit is contained in:
10
.gitignore
vendored
10
.gitignore
vendored
@@ -9,4 +9,12 @@
|
||||
#ignore our settings
|
||||
.env
|
||||
|
||||
models/
|
||||
models/
|
||||
|
||||
transcriptarr.db
|
||||
|
||||
# Python cache
|
||||
__pycache__/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
45
Dockerfile
45
Dockerfile
@@ -1,45 +0,0 @@
|
||||
# Stage 1: Builder
|
||||
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 \
|
||||
python3-pip \
|
||||
ffmpeg \
|
||||
git \
|
||||
tzdata \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Stage 2: Runtime
|
||||
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
# Copy necessary files from the builder stage
|
||||
COPY --from=builder /subgen/launcher.py .
|
||||
COPY --from=builder /subgen/subgen.py .
|
||||
COPY --from=builder /subgen/language_code.py .
|
||||
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
python3 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Set command to run the application
|
||||
CMD ["python3", "launcher.py"]
|
||||
@@ -1,34 +0,0 @@
|
||||
# === Stage 1: Build dependencies and install packages ===
|
||||
FROM python:3.11-slim-bullseye AS builder
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
# Install required build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
git \
|
||||
tzdata \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --prefix=/install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cpu && pip install --no-cache-dir --prefix=/install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# === Stage 2: Create a minimal runtime image ===
|
||||
FROM python:3.11-slim-bullseye AS runtime
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
# Install only required runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy only necessary files from builder stage
|
||||
COPY --from=builder /install /usr/local
|
||||
|
||||
# Copy source code
|
||||
COPY launcher.py subgen.py language_code.py /subgen/
|
||||
|
||||
CMD ["python3", "launcher.py"]
|
||||
@@ -1,40 +0,0 @@
|
||||
#docker-compose.yml
|
||||
version: '2'
|
||||
services:
|
||||
subgen:
|
||||
container_name: subgen
|
||||
tty: true
|
||||
image: mccloud/subgen
|
||||
environment:
|
||||
- "WHISPER_MODEL=medium"
|
||||
- "WHISPER_THREADS=4"
|
||||
- "PROCADDEDMEDIA=True"
|
||||
- "PROCMEDIAONPLAY=False"
|
||||
- "NAMESUBLANG=aa"
|
||||
- "SKIPIFINTERNALSUBLANG=eng"
|
||||
- "PLEXTOKEN=plextoken"
|
||||
- "PLEXSERVER=http://plexserver:32400"
|
||||
- "JELLYFINTOKEN=token here"
|
||||
- "JELLYFINSERVER=http://jellyfin:8096"
|
||||
- "WEBHOOKPORT=9000"
|
||||
- "CONCURRENT_TRANSCRIPTIONS=2"
|
||||
- "WORD_LEVEL_HIGHLIGHT=False"
|
||||
- "DEBUG=True"
|
||||
- "USE_PATH_MAPPING=False"
|
||||
- "PATH_MAPPING_FROM=/tv"
|
||||
- "PATH_MAPPING_TO=/Volumes/TV"
|
||||
- "TRANSCRIBE_DEVICE=cpu"
|
||||
- "CLEAR_VRAM_ON_COMPLETE=True"
|
||||
- "MODEL_PATH=./models"
|
||||
- "UPDATE=False"
|
||||
- "APPEND=False"
|
||||
- "USE_MODEL_PROMPT=False"
|
||||
- "CUSTOM_MODEL_PROMPT="
|
||||
- "LRC_FOR_AUDIO_FILES=True"
|
||||
- "CUSTOM_REGROUP=cm_sl=84_sl=42++++++1"
|
||||
volumes:
|
||||
- "${TV}:/tv"
|
||||
- "${MOVIES}:/movies"
|
||||
- "${APPDATA}/subgen/models:/subgen/models"
|
||||
ports:
|
||||
- "9000:9000"
|
||||
198
language_code.py
198
language_code.py
@@ -1,198 +0,0 @@
|
||||
from enum import Enum
|
||||
|
||||
class LanguageCode(Enum):
|
||||
# ISO 639-1, ISO 639-2/T, ISO 639-2/B, English Name, Native Name
|
||||
AFAR = ("aa", "aar", "aar", "Afar", "Afar")
|
||||
AFRIKAANS = ("af", "afr", "afr", "Afrikaans", "Afrikaans")
|
||||
AMHARIC = ("am", "amh", "amh", "Amharic", "አማርኛ")
|
||||
ARABIC = ("ar", "ara", "ara", "Arabic", "العربية")
|
||||
ASSAMESE = ("as", "asm", "asm", "Assamese", "অসমীয়া")
|
||||
AZERBAIJANI = ("az", "aze", "aze", "Azerbaijani", "Azərbaycanca")
|
||||
BASHKIR = ("ba", "bak", "bak", "Bashkir", "Башҡортса")
|
||||
BELARUSIAN = ("be", "bel", "bel", "Belarusian", "Беларуская")
|
||||
BULGARIAN = ("bg", "bul", "bul", "Bulgarian", "Български")
|
||||
BENGALI = ("bn", "ben", "ben", "Bengali", "বাংলা")
|
||||
TIBETAN = ("bo", "bod", "tib", "Tibetan", "བོད་ཡིག")
|
||||
BRETON = ("br", "bre", "bre", "Breton", "Brezhoneg")
|
||||
BOSNIAN = ("bs", "bos", "bos", "Bosnian", "Bosanski")
|
||||
CATALAN = ("ca", "cat", "cat", "Catalan", "Català")
|
||||
CZECH = ("cs", "ces", "cze", "Czech", "Čeština")
|
||||
WELSH = ("cy", "cym", "wel", "Welsh", "Cymraeg")
|
||||
DANISH = ("da", "dan", "dan", "Danish", "Dansk")
|
||||
GERMAN = ("de", "deu", "ger", "German", "Deutsch")
|
||||
GREEK = ("el", "ell", "gre", "Greek", "Ελληνικά")
|
||||
ENGLISH = ("en", "eng", "eng", "English", "English")
|
||||
SPANISH = ("es", "spa", "spa", "Spanish", "Español")
|
||||
ESTONIAN = ("et", "est", "est", "Estonian", "Eesti")
|
||||
BASQUE = ("eu", "eus", "baq", "Basque", "Euskara")
|
||||
PERSIAN = ("fa", "fas", "per", "Persian", "فارسی")
|
||||
FINNISH = ("fi", "fin", "fin", "Finnish", "Suomi")
|
||||
FAROESE = ("fo", "fao", "fao", "Faroese", "Føroyskt")
|
||||
FRENCH = ("fr", "fra", "fre", "French", "Français")
|
||||
GALICIAN = ("gl", "glg", "glg", "Galician", "Galego")
|
||||
GUJARATI = ("gu", "guj", "guj", "Gujarati", "ગુજરાતી")
|
||||
HAUSA = ("ha", "hau", "hau", "Hausa", "Hausa")
|
||||
HAWAIIAN = ("haw", "haw", "haw", "Hawaiian", "ʻŌlelo Hawaiʻi")
|
||||
HEBREW = ("he", "heb", "heb", "Hebrew", "עברית")
|
||||
HINDI = ("hi", "hin", "hin", "Hindi", "हिन्दी")
|
||||
CROATIAN = ("hr", "hrv", "hrv", "Croatian", "Hrvatski")
|
||||
HAITIAN_CREOLE = ("ht", "hat", "hat", "Haitian Creole", "Kreyòl Ayisyen")
|
||||
HUNGARIAN = ("hu", "hun", "hun", "Hungarian", "Magyar")
|
||||
ARMENIAN = ("hy", "hye", "arm", "Armenian", "Հայերեն")
|
||||
INDONESIAN = ("id", "ind", "ind", "Indonesian", "Bahasa Indonesia")
|
||||
ICELANDIC = ("is", "isl", "ice", "Icelandic", "Íslenska")
|
||||
ITALIAN = ("it", "ita", "ita", "Italian", "Italiano")
|
||||
JAPANESE = ("ja", "jpn", "jpn", "Japanese", "日本語")
|
||||
JAVANESE = ("jw", "jav", "jav", "Javanese", "ꦧꦱꦗꦮ")
|
||||
GEORGIAN = ("ka", "kat", "geo", "Georgian", "ქართული")
|
||||
KAZAKH = ("kk", "kaz", "kaz", "Kazakh", "Қазақша")
|
||||
KHMER = ("km", "khm", "khm", "Khmer", "ភាសាខ្មែរ")
|
||||
KANNADA = ("kn", "kan", "kan", "Kannada", "ಕನ್ನಡ")
|
||||
KOREAN = ("ko", "kor", "kor", "Korean", "한국어")
|
||||
LATIN = ("la", "lat", "lat", "Latin", "Latina")
|
||||
LUXEMBOURGISH = ("lb", "ltz", "ltz", "Luxembourgish", "Lëtzebuergesch")
|
||||
LINGALA = ("ln", "lin", "lin", "Lingala", "Lingála")
|
||||
LAO = ("lo", "lao", "lao", "Lao", "ພາສາລາວ")
|
||||
LITHUANIAN = ("lt", "lit", "lit", "Lithuanian", "Lietuvių")
|
||||
LATVIAN = ("lv", "lav", "lav", "Latvian", "Latviešu")
|
||||
MALAGASY = ("mg", "mlg", "mlg", "Malagasy", "Malagasy")
|
||||
MAORI = ("mi", "mri", "mao", "Maori", "Te Reo Māori")
|
||||
MACEDONIAN = ("mk", "mkd", "mac", "Macedonian", "Македонски")
|
||||
MALAYALAM = ("ml", "mal", "mal", "Malayalam", "മലയാളം")
|
||||
MONGOLIAN = ("mn", "mon", "mon", "Mongolian", "Монгол")
|
||||
MARATHI = ("mr", "mar", "mar", "Marathi", "मराठी")
|
||||
MALAY = ("ms", "msa", "may", "Malay", "Bahasa Melayu")
|
||||
MALTESE = ("mt", "mlt", "mlt", "Maltese", "Malti")
|
||||
BURMESE = ("my", "mya", "bur", "Burmese", "မြန်မာစာ")
|
||||
NEPALI = ("ne", "nep", "nep", "Nepali", "नेपाली")
|
||||
DUTCH = ("nl", "nld", "dut", "Dutch", "Nederlands")
|
||||
NORWEGIAN_NYNORSK = ("nn", "nno", "nno", "Norwegian Nynorsk", "Nynorsk")
|
||||
NORWEGIAN = ("no", "nor", "nor", "Norwegian", "Norsk")
|
||||
OCCITAN = ("oc", "oci", "oci", "Occitan", "Occitan")
|
||||
PUNJABI = ("pa", "pan", "pan", "Punjabi", "ਪੰਜਾਬੀ")
|
||||
POLISH = ("pl", "pol", "pol", "Polish", "Polski")
|
||||
PASHTO = ("ps", "pus", "pus", "Pashto", "پښتو")
|
||||
PORTUGUESE = ("pt", "por", "por", "Portuguese", "Português")
|
||||
ROMANIAN = ("ro", "ron", "rum", "Romanian", "Română")
|
||||
RUSSIAN = ("ru", "rus", "rus", "Russian", "Русский")
|
||||
SANSKRIT = ("sa", "san", "san", "Sanskrit", "संस्कृतम्")
|
||||
SINDHI = ("sd", "snd", "snd", "Sindhi", "سنڌي")
|
||||
SINHALA = ("si", "sin", "sin", "Sinhala", "සිංහල")
|
||||
SLOVAK = ("sk", "slk", "slo", "Slovak", "Slovenčina")
|
||||
SLOVENE = ("sl", "slv", "slv", "Slovene", "Slovenščina")
|
||||
SHONA = ("sn", "sna", "sna", "Shona", "ChiShona")
|
||||
SOMALI = ("so", "som", "som", "Somali", "Soomaaliga")
|
||||
ALBANIAN = ("sq", "sqi", "alb", "Albanian", "Shqip")
|
||||
SERBIAN = ("sr", "srp", "srp", "Serbian", "Српски")
|
||||
SUNDANESE = ("su", "sun", "sun", "Sundanese", "Basa Sunda")
|
||||
SWEDISH = ("sv", "swe", "swe", "Swedish", "Svenska")
|
||||
SWAHILI = ("sw", "swa", "swa", "Swahili", "Kiswahili")
|
||||
TAMIL = ("ta", "tam", "tam", "Tamil", "தமிழ்")
|
||||
TELUGU = ("te", "tel", "tel", "Telugu", "తెలుగు")
|
||||
TAJIK = ("tg", "tgk", "tgk", "Tajik", "Тоҷикӣ")
|
||||
THAI = ("th", "tha", "tha", "Thai", "ไทย")
|
||||
TURKMEN = ("tk", "tuk", "tuk", "Turkmen", "Türkmençe")
|
||||
TAGALOG = ("tl", "tgl", "tgl", "Tagalog", "Tagalog")
|
||||
TURKISH = ("tr", "tur", "tur", "Turkish", "Türkçe")
|
||||
TATAR = ("tt", "tat", "tat", "Tatar", "Татарча")
|
||||
UKRAINIAN = ("uk", "ukr", "ukr", "Ukrainian", "Українська")
|
||||
URDU = ("ur", "urd", "urd", "Urdu", "اردو")
|
||||
UZBEK = ("uz", "uzb", "uzb", "Uzbek", "Oʻzbek")
|
||||
VIETNAMESE = ("vi", "vie", "vie", "Vietnamese", "Tiếng Việt")
|
||||
YIDDISH = ("yi", "yid", "yid", "Yiddish", "ייִדיש")
|
||||
YORUBA = ("yo", "yor", "yor", "Yoruba", "Yorùbá")
|
||||
CHINESE = ("zh", "zho", "chi", "Chinese", "中文")
|
||||
CANTONESE = ("yue", "yue", "yue", "Cantonese", "粵語")
|
||||
NONE = (None, None, None, None, None) # For no language
|
||||
# und for Undetermined aka unknown language https://www.loc.gov/standards/iso639-2/faq.html#25
|
||||
|
||||
def __init__(self, iso_639_1, iso_639_2_t, iso_639_2_b, name_en, name_native):
|
||||
self.iso_639_1 = iso_639_1
|
||||
self.iso_639_2_t = iso_639_2_t
|
||||
self.iso_639_2_b = iso_639_2_b
|
||||
self.name_en = name_en
|
||||
self.name_native = name_native
|
||||
|
||||
@staticmethod
|
||||
def from_iso_639_1(code):
|
||||
for lang in LanguageCode:
|
||||
if lang.iso_639_1 == code:
|
||||
return lang
|
||||
return LanguageCode.NONE
|
||||
|
||||
@staticmethod
|
||||
def from_iso_639_2(code):
|
||||
for lang in LanguageCode:
|
||||
if lang.iso_639_2_t == code or lang.iso_639_2_b == code:
|
||||
return lang
|
||||
return LanguageCode.NONE
|
||||
|
||||
@staticmethod
|
||||
def from_name(name : str):
|
||||
"""Convert a language name (either English or native) to LanguageCode enum."""
|
||||
for lang in LanguageCode:
|
||||
if lang.name_en.lower() == name.lower() or lang.name_native.lower() == name.lower():
|
||||
return lang
|
||||
LanguageCode.NONE
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_string(value: str):
|
||||
"""
|
||||
Convert a string to a LanguageCode instance. Matches on ISO codes, English name, or native name.
|
||||
"""
|
||||
if value is None:
|
||||
return LanguageCode.NONE
|
||||
value = value.strip().lower()
|
||||
for lang in LanguageCode:
|
||||
if lang is LanguageCode.NONE:
|
||||
continue
|
||||
elif (
|
||||
value == lang.iso_639_1
|
||||
or value == lang.iso_639_2_t
|
||||
or value == lang.iso_639_2_b
|
||||
or value == lang.name_en.lower()
|
||||
or value == lang.name_native.lower()
|
||||
):
|
||||
return lang
|
||||
return LanguageCode.NONE
|
||||
|
||||
# is valid language
|
||||
@staticmethod
|
||||
def is_valid_language(language: str):
|
||||
return LanguageCode.from_string(language) is not LanguageCode.NONE
|
||||
|
||||
def to_iso_639_1(self):
|
||||
return self.iso_639_1
|
||||
|
||||
def to_iso_639_2_t(self):
|
||||
return self.iso_639_2_t
|
||||
|
||||
def to_iso_639_2_b(self):
|
||||
return self.iso_639_2_b
|
||||
|
||||
def to_name(self, in_english=True):
|
||||
return self.name_en if in_english else self.name_native
|
||||
def __str__(self):
|
||||
if self.name_en is None:
|
||||
return "Unknown"
|
||||
return self.name_en
|
||||
|
||||
def __bool__(self):
|
||||
return True if self.iso_639_1 is not None else False
|
||||
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
Compare the LanguageCode instance to another object.
|
||||
Explicitly handle comparison to None.
|
||||
"""
|
||||
if other is None:
|
||||
# If compared to None, return False unless self is None
|
||||
return self.iso_639_1 is None
|
||||
if isinstance(other, str): # Allow comparison with a string
|
||||
return self.value == LanguageCode.from_string(other)
|
||||
if isinstance(other, LanguageCode):
|
||||
# Normal comparison for LanguageCode instances
|
||||
return self.iso_639_1 == other.iso_639_1
|
||||
# Otherwise, defer to the default equality
|
||||
return NotImplemented
|
||||
182
launcher.py
182
launcher.py
@@ -1,182 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
def convert_to_bool(in_bool):
|
||||
# Convert the input to string and lower case, then check against true values
|
||||
return str(in_bool).lower() in ('true', 'on', '1', 'y', 'yes')
|
||||
|
||||
def install_packages_from_requirements(requirements_file):
|
||||
try:
|
||||
subprocess.run(['pip3', 'install', '-r', requirements_file, '--upgrade'], check=True)
|
||||
print("Packages installed successfully using pip3.")
|
||||
except subprocess.CalledProcessError:
|
||||
try:
|
||||
subprocess.run(['pip', 'install', '-r', requirements_file, '--upgrade'], check=True)
|
||||
print("Packages installed successfully using pip.")
|
||||
except subprocess.CalledProcessError:
|
||||
print("Failed to install packages using both pip3 and pip.")
|
||||
|
||||
def download_from_github(url, output_file):
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
|
||||
data = response.read()
|
||||
out_file.write(data)
|
||||
print(f"File downloaded successfully to {output_file}")
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f"Failed to download file from {url}. HTTP Error Code: {e.code}")
|
||||
except urllib.error.URLError as e:
|
||||
print(f"URL Error: {e.reason}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
def prompt_and_save_bazarr_env_variables():
|
||||
instructions = (
|
||||
"You will be prompted for several configuration values.\n"
|
||||
"If you wish to use the default value for any of them, simply press Enter without typing anything.\n"
|
||||
"The default values are shown in brackets [] next to the prompts.\n"
|
||||
"Items can be the value of true, on, 1, y, yes, false, off, 0, n, no, or an appropriate text response.\n"
|
||||
)
|
||||
print(instructions)
|
||||
env_vars = {
|
||||
'WHISPER_MODEL': ('Whisper Model', 'Enter the Whisper model you want to run: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, distil-large-v2, distil-medium.en, distil-small.en', 'medium'),
|
||||
'WEBHOOKPORT': ('Webhook Port', 'Default listening port for transcriptarr.py', '9000'),
|
||||
'TRANSCRIBE_DEVICE': ('Transcribe Device', 'Set as cpu or gpu', 'gpu'),
|
||||
# Defaulting to False here for the prompt, user can change
|
||||
'DEBUG': ('Debug', 'Enable debug logging (true/false)', 'False'),
|
||||
'CLEAR_VRAM_ON_COMPLETE': ('Clear VRAM', 'Attempt to clear VRAM when complete (Windows users may need to set this to False)', 'False'),
|
||||
'APPEND': ('Append', 'Append \'Transcribed by whisper\' to generated subtitle (true/false)', 'False'),
|
||||
}
|
||||
|
||||
user_input = {}
|
||||
with open('.env', 'w') as file:
|
||||
for var, (description, prompt, default) in env_vars.items():
|
||||
value = input(f"{prompt} [{default}]: ") or default
|
||||
file.write(f"{var}={value}\n")
|
||||
print("Environment variables have been saved to .env")
|
||||
|
||||
def load_env_variables(env_filename='.env'):
|
||||
try:
|
||||
with open(env_filename, 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and '=' in line:
|
||||
var, value = line.split('=', 1)
|
||||
# Only set if not already set by a higher priority mechanism (like external env var)
|
||||
# For this simple loader, we'll let it overwrite,
|
||||
# and CLI args will overwrite these later if specified.
|
||||
os.environ[var] = value
|
||||
print(f"Environment variables have been loaded from {env_filename}")
|
||||
except FileNotFoundError:
|
||||
print(f"{env_filename} file not found. Consider running with --setup-bazarr or creating it manually.")
|
||||
|
||||
def main():
|
||||
if 'python3' in sys.executable:
|
||||
python_cmd = 'python3'
|
||||
elif 'python' in sys.executable:
|
||||
python_cmd = 'python'
|
||||
else:
|
||||
print("Script started with an unknown command")
|
||||
sys.exit(1)
|
||||
if sys.version_info[0] < 3:
|
||||
print(f"This script requires Python 3 or higher, you are running {sys.version}")
|
||||
sys.exit(1)
|
||||
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
parser = argparse.ArgumentParser(prog="python launcher.py", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
# Changed: action='store_true' means it's False by default, True if flag is present
|
||||
parser.add_argument('-d', '--debug', action='store_true', help="Enable console debugging (overrides .env and external ENV)")
|
||||
parser.add_argument('-i', '--install', action='store_true', help="Install/update all necessary packages")
|
||||
# Changed: action='store_true'
|
||||
parser.add_argument('-a', '--append', action='store_true', help="Append 'Transcribed by whisper' (overrides .env and external ENV)")
|
||||
parser.add_argument('-u', '--update', action='store_true', help="Update Subgen")
|
||||
parser.add_argument('-x', '--exit-early', action='store_true', help="Exit without running transcriptarr.py")
|
||||
parser.add_argument('-s', '--setup-bazarr', action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs")
|
||||
parser.add_argument('-b', '--branch', type=str, default='main', help='Specify the branch to download from')
|
||||
parser.add_argument('-l', '--launcher-update', action='store_true', help="Update launcher.py and re-launch")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
branch_name = args.branch if args.branch != 'main' else os.getenv('BRANCH', 'main')
|
||||
script_name_suffix = f"-{branch_name}.py" if branch_name != "main" else ".py"
|
||||
subgen_script_to_run = f"subgen{script_name_suffix}"
|
||||
language_code_script_to_download = f"language_code{script_name_suffix}"
|
||||
|
||||
|
||||
if args.launcher_update or convert_to_bool(os.getenv('LAUNCHER_UPDATE')):
|
||||
print(f"Updating launcher.py from GitHub branch {branch_name}...")
|
||||
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/launcher.py", f'launcher{script_name_suffix}')
|
||||
excluded_args = ['--launcher-update', '-l']
|
||||
new_args = [arg for arg in sys.argv[1:] if arg not in excluded_args]
|
||||
print(f"Relaunching updated launcher: launcher{script_name_suffix}")
|
||||
os.execl(sys.executable, sys.executable, f"launcher{script_name_suffix}", *new_args)
|
||||
# The script will not continue past os.execl
|
||||
|
||||
# --- Environment Variable Handling ---
|
||||
# 1. Load from .env file first. This sets a baseline.
|
||||
# External environment variables (set before launcher.py) will already be in os.environ
|
||||
# and won't be overwritten by load_env_variables IF load_env_variables checked for existence.
|
||||
# For simplicity, this version of load_env_variables *will* overwrite.
|
||||
# If you need to preserve external env vars over .env, load_env_variables needs adjustment.
|
||||
if args.setup_bazarr:
|
||||
prompt_and_save_bazarr_env_variables()
|
||||
# After saving, load them immediately for this run
|
||||
load_env_variables()
|
||||
else:
|
||||
# Load if not setting up, assuming .env might exist
|
||||
load_env_variables()
|
||||
|
||||
|
||||
# 2. Override with command-line arguments (highest priority for these specific flags)
|
||||
if args.debug: # If -d or --debug was passed
|
||||
os.environ['DEBUG'] = 'True'
|
||||
print("Launcher CLI: DEBUG set to True")
|
||||
elif 'DEBUG' not in os.environ: # If not set by CLI and not by .env or external
|
||||
os.environ['DEBUG'] = 'False' # Default to False if nothing else specified it
|
||||
print("Launcher: DEBUG defaulted to False (no prior setting)")
|
||||
|
||||
|
||||
if args.append: # If -a or --append was passed
|
||||
os.environ['APPEND'] = 'True'
|
||||
print("Launcher CLI: APPEND set to True")
|
||||
elif 'APPEND' not in os.environ: # If not set by CLI and not by .env or external
|
||||
os.environ['APPEND'] = 'False' # Default to False if nothing else specified it
|
||||
#print("Launcher: APPEND defaulted to False (no prior setting)")
|
||||
# --- End Environment Variable Handling ---
|
||||
|
||||
|
||||
requirements_url = "https://raw.githubusercontent.com/McCloudS/subgen/main/requirements.txt"
|
||||
requirements_file = "requirements.txt"
|
||||
|
||||
if args.install:
|
||||
download_from_github(requirements_url, requirements_file)
|
||||
install_packages_from_requirements(requirements_file)
|
||||
|
||||
if not os.path.exists(subgen_script_to_run) or args.update or convert_to_bool(os.getenv('UPDATE')):
|
||||
print(f"Downloading {subgen_script_to_run} from GitHub branch {branch_name}...")
|
||||
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/transcriptarr.py", subgen_script_to_run)
|
||||
print(f"Downloading {language_code_script_to_download} from GitHub branch {branch_name}...")
|
||||
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/language_code.py", language_code_script_to_download)
|
||||
|
||||
else:
|
||||
print(f"{subgen_script_to_run} exists and UPDATE is set to False, skipping download.")
|
||||
|
||||
if not args.exit_early:
|
||||
#print(f"DEBUG environment variable for transcriptarr.py: {os.getenv('DEBUG')}")
|
||||
#print(f"APPEND environment variable for transcriptarr.py: {os.getenv('APPEND')}")
|
||||
print(f'Launching {subgen_script_to_run}')
|
||||
try:
|
||||
subprocess.run([python_cmd, '-u', subgen_script_to_run], check=True)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Could not find {subgen_script_to_run}. Make sure it was downloaded correctly.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running {subgen_script_to_run}: {e}")
|
||||
|
||||
else:
|
||||
print("Not running transcriptarr.py: -x or --exit-early set")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
subgen.xml
56
subgen.xml
@@ -1,56 +0,0 @@
|
||||
<?xml version="1.0"?>
|
||||
<Container version="2">
|
||||
<Name>subgen</Name>
|
||||
<ExtraParams>--gpus all</ExtraParams>
|
||||
<Beta>false</Beta>
|
||||
<Category>CATEGORY:</Category>
|
||||
<Repository>mccloud/subgen</Repository>
|
||||
<Registry>https://github.com/McCloudS/subgen</Registry>
|
||||
<DonateText>If you appreciate my work, then please consider donating</DonateText>
|
||||
<DonateLink>https://www.paypal.com/donate/?hosted_button_id=SU4QQP6LH5PF6</DonateLink>
|
||||
<DonateImg>https://www.paypal.com/en_US/i/btn/btn_donate_SM.gif</DonateImg>
|
||||
<Network>bridge</Network>
|
||||
<Privileged>false</Privileged>
|
||||
<Support>https://github.com/McCloudS/subgen/issues</Support>
|
||||
<Shell>bash</Shell>
|
||||
<GitHub>https://github.com/McCloudS/subgen</GitHub>
|
||||
<ReadMe>https://github.com/McCloudS/subgen/blob/main/README.md</ReadMe>
|
||||
<Project>https://github.com/McCloudS/subgen</Project>
|
||||
<Overview>subgen will transcribe your personal media on a Plex, Emby, or Jellyfin server to create subtitles (.srt) from audio/video files, it can also be used as a Whisper Provider in Bazarr</Overview>
|
||||
<WebUI>http://[IP]:[PORT:9000]/docs</WebUI>
|
||||
<TemplateURL>https://github.com/McCloudS/subgen/blob/main/subgen.xml</TemplateURL>
|
||||
<Icon>https://raw.githubusercontent.com/McCloudS/subgen/main/icon.png</Icon>
|
||||
<Date>2024-03-23</Date>
|
||||
<Changes></Changes>
|
||||
<Config Name="Port: Webhook Port" Target="9000" Default="9000" Mode="tcp" Description="This is the port for the webhook" Type="Port" Display="always" Required="true" Mask="false"/>
|
||||
<Config Name="Path: /subgen" Target="/subgen" Default="/mnt/user/appdata/subgen" Mode="rw" Description="This is the container path to your configuration files." Type="Path" Display="always" Required="true" Mask="false"/>
|
||||
<Config Name="Variable: TRANSCRIBE_DEVICE" Target="TRANSCRIBE_DEVICE" Default="gpu" Description="Can transcribe via gpu (Cuda only) or cpu. Takes option of 'cpu', 'gpu', 'cuda'." Type="Variable" Display="always" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: WHISPER_MODEL" Target="WHISPER_MODEL" Default="medium" Description="Can be:'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1','large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-medium.en', 'distil-small.en'" Type="Variable" Display="always" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: CONCURRENT_TRANSCRIPTIONS" Target="CONCURRENT_TRANSCRIPTIONS" Default="2" Description="Number of files it will transcribe in parallel" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: WHISPER_THREADS" Target="WHISPER_THREADS" Default="4" Description="number of threads to use during computation" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: MODEL_PATH" Target="MODEL_PATH" Default="./models" Description="This is where the WHISPER_MODEL will be stored. This defaults to placing it where you execute the script in the folder 'models'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: PROCADDEDMEDIA" Target="PROCADDEDMEDIA" Default="True" Description="will gen subtitles for all media added regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: PROCMEDIAONPLAY" Target="PROCMEDIAONPLAY" Default="True" Description="will gen subtitles for all played media regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: NAMESUBLANG" Target="NAMESUBLANG" Default="aa" Description="allows you to pick what it will name the subtitle. Instead of using EN, I'm using AA, so it doesn't mix with exiting external EN subs, and AA will populate higher on the list in Plex." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: SKIPIFINTERNALSUBLANG" Target="SKIPIFINTERNALSUBLANG" Default="eng" Description="Will not generate a subtitle if the file has an internal sub matching the 3 letter code of this variable (See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: WORD_LEVEL_HIGHLIGHT" Target="WORD_LEVEL_HIGHLIGHT" Default="False" Description="Highlights each words as it's spoken in the subtitle. See example video @ https://github.com/jianfch/stable-ts" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: PLEXSERVER" Target="PLEXSERVER" Default="http://plex:32400" Description="This needs to be set to your local plex server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: PLEXTOKEN" Target="PLEXTOKEN" Default="token here" Description="This needs to be set to your plex token found by https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: JELLYFINSERVER" Target="JELLYFINSERVER" Default="http://jellyfin:8096" Description="Set to your Jellyfin server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: JELLYFINTOKEN" Target="JELLYFINTOKEN" Default="token here" Description="Generate a token inside the Jellyfin interface" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: WEBHOOKPORT" Target="WEBHOOKPORT" Default="9000" Description="Change this if you need a different port for your webhook" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: TRANSCRIBE_FOLDERS" Target="TRANSCRIBE_FOLDERS" Default="" Description="Takes a pipe '|' separated list (For example: /tv|/movies|/familyvideos) and iterates through and adds those files to be queued for subtitle generation if they don't have internal subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: TRANSCRIBE_OR_TRANSLATE" Target="TRANSCRIBE_OR_TRANSLATE" Default="transcribe" Description="Takes either 'transcribe' or 'translate'. Transcribe will transcribe the audio in the same language as the input. Translate will transcribe and translate into English." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: COMPUTE_TYPE" Target="COMPUTE_TYPE" Default="auto" Description="Set compute-type using the following information: https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: DEBUG" Target="DEBUG" Default="True" Description="Provides some debug data that can be helpful to troubleshoot path mapping and other issues." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: FORCE_DETECTED_LANGUAGE_TO" Target="FORCE_DETECTED_LANGUAGE_TO" Default="" Description="This is to force the model to a language instead of the detected one, takes a 2 letter language code. For example, your audio is French but keeps detecting as English, you would set it to 'fr'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: CLEAR_VRAM_ON_COMPLETE" Target="CLEAR_VRAM_ON_COMPLETE" Default="False" Description="This will delete the model and do garbage collection when queue is empty. Good if you need to use the VRAM for something else." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: UPDATE" Target="UPDATE" Default="True" Description="Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates." Type="Variable" Display="always" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: APPEND" Target="APPEND" Default="False" Description="Will add the following at the end of a subtitle: 'Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: MONITOR" Target="MONITOR" Default="False" Description="Will monitor TRANSCRIBE_FOLDERS for real-time changes to see if we need to generate subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: USE_MODEL_PROMPT" Target="USE_MODEL_PROMPT" Default="False" Description="When set to True, will use the default prompt stored in greetings_translations 'Hello, welcome to my lecture.' to try and force the use of punctuation in transcriptions that don't." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: CUSTOM_MODEL_PROMPT" Target="CUSTOM_MODEL_PROMPT" Default="" Description="If USE_MODEL_PROMPT is True, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples)." Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: LRC_FOR_AUDIO_FILES" Target="LRC_FOR_AUDIO_FILES" Default="True" Description="Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
<Config Name="Variable: CUSTOM_REGROUP" Target="CUSTOM_REGROUP" Default="cm_sl=84_sl=42++++++1" Description="Attempts to regroup some of the segments to make a cleaner looking subtitle. See Issue #68 for discussion. Set to blank if you want to use Stable-TS default regroups algorithm of cm_sp=,* /,_sg=.5_mg=.3+3_sp=.* /。/?/?'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
|
||||
|
||||
</Container>
|
||||
163
test_backend.py
163
test_backend.py
@@ -1,163 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test script for TranscriptorIO backend components."""
|
||||
import sys
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_config():
|
||||
"""Test configuration loading."""
|
||||
logger.info("Testing configuration...")
|
||||
try:
|
||||
from backend.config import settings
|
||||
logger.info(f"✓ Config loaded successfully")
|
||||
logger.info(f" - Mode: {settings.transcriptarr_mode}")
|
||||
logger.info(f" - Database: {settings.database_type.value}")
|
||||
logger.info(f" - Whisper Model: {settings.whisper_model}")
|
||||
logger.info(f" - Device: {settings.transcribe_device}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Config test failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_database():
|
||||
"""Test database connection and table creation."""
|
||||
logger.info("\nTesting database...")
|
||||
try:
|
||||
from backend.core.database import database
|
||||
from backend.core.models import Base
|
||||
|
||||
# Clean database for fresh test
|
||||
try:
|
||||
database.drop_tables()
|
||||
logger.info(f" - Dropped existing tables for clean test")
|
||||
except:
|
||||
pass
|
||||
|
||||
database.create_tables()
|
||||
logger.info(f"✓ Database initialized with fresh tables")
|
||||
|
||||
# Test connection with health check
|
||||
if database.health_check():
|
||||
logger.info(f"✓ Database connection OK")
|
||||
else:
|
||||
logger.error("✗ Database health check failed (but tables were created)")
|
||||
# Don't fail the test if health check fails but tables exist
|
||||
return True
|
||||
|
||||
# Get stats
|
||||
stats = database.get_stats()
|
||||
logger.info(f" - Type: {stats['type']}")
|
||||
logger.info(f" - URL: {stats['url']}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Database test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def test_queue_manager():
|
||||
"""Test queue manager operations."""
|
||||
logger.info("\nTesting queue manager...")
|
||||
try:
|
||||
from backend.core.queue_manager import queue_manager
|
||||
from backend.core.models import QualityPreset
|
||||
|
||||
# Add a test job
|
||||
job = queue_manager.add_job(
|
||||
file_path="/test/anime.mkv",
|
||||
file_name="anime.mkv",
|
||||
source_lang="ja",
|
||||
target_lang="es",
|
||||
quality_preset=QualityPreset.FAST,
|
||||
priority=5
|
||||
)
|
||||
|
||||
if job:
|
||||
logger.info(f"✓ Job created: {job.id}")
|
||||
logger.info(f" - File: {job.file_name}")
|
||||
logger.info(f" - Status: {job.status.value}")
|
||||
logger.info(f" - Priority: {job.priority}")
|
||||
else:
|
||||
logger.error("✗ Failed to create job")
|
||||
return False
|
||||
|
||||
# Get queue stats
|
||||
stats = queue_manager.get_queue_stats()
|
||||
logger.info(f"✓ Queue stats:")
|
||||
logger.info(f" - Total: {stats['total']}")
|
||||
logger.info(f" - Queued: {stats['queued']}")
|
||||
logger.info(f" - Processing: {stats['processing']}")
|
||||
logger.info(f" - Completed: {stats['completed']}")
|
||||
|
||||
# Try to add duplicate
|
||||
duplicate = queue_manager.add_job(
|
||||
file_path="/test/anime.mkv",
|
||||
file_name="anime.mkv",
|
||||
source_lang="ja",
|
||||
target_lang="es",
|
||||
quality_preset=QualityPreset.FAST
|
||||
)
|
||||
|
||||
if duplicate is None:
|
||||
logger.info(f"✓ Duplicate detection working")
|
||||
else:
|
||||
logger.warning(f"⚠ Duplicate job was created (should have been rejected)")
|
||||
|
||||
# Get next job
|
||||
next_job = queue_manager.get_next_job("test-worker-1")
|
||||
if next_job:
|
||||
logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)")
|
||||
logger.info(f" - Status: {next_job.status.value}")
|
||||
else:
|
||||
logger.error("✗ Failed to get next job")
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Queue manager test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all tests."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("TranscriptorIO Backend Test Suite")
|
||||
logger.info("=" * 60)
|
||||
|
||||
results = {
|
||||
"Config": test_config(),
|
||||
"Database": test_database(),
|
||||
"Queue Manager": test_queue_manager(),
|
||||
}
|
||||
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("Test Results:")
|
||||
logger.info("=" * 60)
|
||||
|
||||
all_passed = True
|
||||
for test_name, passed in results.items():
|
||||
status = "✓ PASSED" if passed else "✗ FAILED"
|
||||
logger.info(f"{test_name}: {status}")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
|
||||
logger.info("=" * 60)
|
||||
|
||||
if all_passed:
|
||||
logger.info("🎉 All tests passed!")
|
||||
return 0
|
||||
else:
|
||||
logger.error("❌ Some tests failed")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
1779
transcriptarr.py
1779
transcriptarr.py
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user