Compare commits

...

10 Commits

Author SHA1 Message Date
4efdce8983 feat(frontend): add Vue 3 web application
Some checks failed
Build_Subgen_Dockerfile_CPU / docker (push) Failing after 27s
Build_Subgen_Dockerfile_GPU / docker (push) Has been cancelled
- Add Vue 3 + TypeScript + Pinia setup
- Add 6 complete views: Dashboard, Queue, Scanner, Rules, Workers, Settings
- Add Pinia stores for state management
- Add API service with Axios client
- Add dark theme with Tdarr-inspired styling
- Add setup wizard component
- Add path browser for filesystem navigation
2026-01-16 16:59:15 +01:00
a14d13c9d0 feat(cli): add CLI interface and setup wizard
- Add CLI with server, db, worker, scan, setup commands
- Add interactive setup wizard for first-run configuration
- Add FastAPI application with lifespan management
- Update requirements.txt with all dependencies
2026-01-16 16:58:20 +01:00
6272efbcd5 feat(api): add REST API with 45+ endpoints
- Add workers API for pool management
- Add jobs API for queue operations
- Add scan-rules API for CRUD operations
- Add scanner API for control and status
- Add settings API for configuration management
- Add system API for resource monitoring
- Add filesystem API for path browsing
- Add setup wizard API endpoint
2026-01-16 16:57:59 +01:00
c019e96cfa feat(workers): add multiprocessing worker pool system
- Add Worker class with CPU/GPU support
- Add WorkerPool for orchestrating multiple workers
- Support dynamic add/remove workers at runtime
- Add health monitoring with graceful shutdown
2026-01-16 16:56:42 +01:00
cbf5ef9623 feat(transcription): add Whisper transcriber and audio utilities
- Add WhisperTranscriber wrapper for stable-ts/faster-whisper
- Add audio utilities for ffmpeg/ffprobe operations
- Add translator for two-stage translation workflow
- Support CPU/GPU with graceful degradation
2026-01-16 16:55:02 +01:00
d28c4caa6a feat(scanning): add library scanner with rules engine
- Add ScanRule model with configurable conditions
- Add FileAnalyzer for ffprobe-based media analysis
- Add LibraryScanner with manual, scheduled and watcher modes
- Add LanguageDetector for audio language detection
- Support rule-based filtering with priority evaluation
2026-01-16 16:54:41 +01:00
58c565cd96 feat(core): add database, models, queue and settings system
- Add SQLAlchemy database setup with session management
- Add Job model with status, priority and progress tracking
- Add QueueManager with priority queue and deduplication
- Add SystemSettings model for database-backed configuration
- Add SettingsService with caching and defaults
- Add SystemMonitor for CPU/RAM/GPU resource monitoring
- Add LanguageCode utilities (moved from root)
2026-01-16 15:11:30 +01:00
9594c0b8ab refactor(config): simplify to database-backed settings
- Reduce .env.example to only DATABASE_URL
- Simplify backend/config.py to only read database connection
- All other settings now managed via database and Web UI
2026-01-16 15:11:02 +01:00
8373d8765f docs: add comprehensive project documentation
- Replace original Subgen README with TranscriptorIO documentation
- Add docs/API.md with 45+ REST endpoint documentation
- Add docs/ARCHITECTURE.md with backend component details
- Add docs/FRONTEND.md with Vue 3 frontend structure
- Add docs/CONFIGURATION.md with settings system documentation
- Remove outdated backend/README.md
2026-01-16 15:10:41 +01:00
9655686a50 chore: cleanup legacy files and update gitignore
- Remove obsolete Docker files (will be recreated later)
- Remove legacy launcher.py and transcriptarr.py
- Remove subgen.xml configuration
- Remove test_backend.py (tests will be restructured)
- Remove language_code.py from root (moved to backend/core/)
- Update .gitignore for Python project structure
2026-01-16 15:10:14 +01:00
79 changed files with 23038 additions and 3071 deletions

View File

@@ -1,90 +1,20 @@
# ============================================
# TranscriptorIO Configuration
# ============================================
#
# IMPORTANT: Most configuration is now stored in the database
# and managed through the Web UI Settings page.
#
# Only DATABASE_URL is required in this file.
# Run the server and complete the Setup Wizard for initial configuration.
#
# === Application Mode ===
# Options: standalone, provider, or standalone,provider (hybrid mode)
TRANSCRIPTARR_MODE=standalone
# === Database Configuration ===
# SQLite (default - no additional driver needed)
# === Database Configuration (REQUIRED) ===
# SQLite (default - good for single-user, no additional driver needed)
DATABASE_URL=sqlite:///./transcriptarr.db
# PostgreSQL example (requires psycopg2-binary)
# PostgreSQL (recommended for production, requires psycopg2-binary)
# DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr
# MariaDB/MySQL example (requires pymysql)
# MariaDB/MySQL (requires pymysql)
# DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr
# === Worker Configuration ===
CONCURRENT_TRANSCRIPTIONS=2
WHISPER_THREADS=4
TRANSCRIBE_DEVICE=cpu
CLEAR_VRAM_ON_COMPLETE=True
# === Whisper Model Configuration ===
# Options: tiny, base, small, medium, large-v3, large-v3-turbo, etc.
WHISPER_MODEL=medium
MODEL_PATH=./models
COMPUTE_TYPE=auto
# === Standalone Mode Configuration ===
# Pipe-separated paths to scan
LIBRARY_PATHS=/media/anime|/media/movies
AUTO_SCAN_ENABLED=False
SCAN_INTERVAL_MINUTES=30
# Filter rules for standalone mode
REQUIRED_AUDIO_LANGUAGE=ja
REQUIRED_MISSING_SUBTITLE=spa
SKIP_IF_SUBTITLE_EXISTS=True
# === Provider Mode Configuration ===
BAZARR_URL=http://bazarr:6767
BAZARR_API_KEY=your_api_key_here
PROVIDER_TIMEOUT_SECONDS=600
PROVIDER_CALLBACK_ENABLED=True
PROVIDER_POLLING_INTERVAL=30
# === API Configuration ===
WEBHOOK_PORT=9000
API_HOST=0.0.0.0
DEBUG=True
# === Transcription Settings ===
# Options: transcribe, translate
TRANSCRIBE_OR_TRANSLATE=transcribe
SUBTITLE_LANGUAGE_NAME=
# Options: ISO_639_1, ISO_639_2_T, ISO_639_2_B, NAME, NATIVE
SUBTITLE_LANGUAGE_NAMING_TYPE=ISO_639_2_B
WORD_LEVEL_HIGHLIGHT=False
CUSTOM_REGROUP=cm_sl=84_sl=42++++++1
# === Skip Configuration ===
SKIP_IF_EXTERNAL_SUBTITLES_EXIST=False
SKIP_IF_TARGET_SUBTITLES_EXIST=True
SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE=eng
# Pipe-separated language codes
SKIP_SUBTITLE_LANGUAGES=
SKIP_IF_AUDIO_LANGUAGES=
SKIP_UNKNOWN_LANGUAGE=False
SKIP_ONLY_SUBGEN_SUBTITLES=False
# === Advanced Settings ===
FORCE_DETECTED_LANGUAGE_TO=
DETECT_LANGUAGE_LENGTH=30
DETECT_LANGUAGE_OFFSET=0
SHOULD_WHISPER_DETECT_AUDIO_LANGUAGE=False
# Pipe-separated list in order of preference
PREFERRED_AUDIO_LANGUAGES=eng
# === Path Mapping ===
USE_PATH_MAPPING=False
PATH_MAPPING_FROM=/tv
PATH_MAPPING_TO=/Volumes/TV
# === Legacy SubGen Compatibility ===
SHOW_IN_SUBNAME_SUBGEN=True
SHOW_IN_SUBNAME_MODEL=True
APPEND=False
LRC_FOR_AUDIO_FILES=True

10
.gitignore vendored
View File

@@ -9,4 +9,12 @@
#ignore our settings
.env
models/
models/
transcriptarr.db
# Python cache
__pycache__/
**/__pycache__/
*.pyc
*.pyo

View File

@@ -1,45 +0,0 @@
# Stage 1: Builder
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
WORKDIR /subgen
ARG DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
ffmpeg \
git \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Stage 2: Runtime
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
WORKDIR /subgen
# Copy necessary files from the builder stage
COPY --from=builder /subgen/launcher.py .
COPY --from=builder /subgen/subgen.py .
COPY --from=builder /subgen/language_code.py .
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
python3 \
curl \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1
# Set command to run the application
CMD ["python3", "launcher.py"]

View File

@@ -1,34 +0,0 @@
# === Stage 1: Build dependencies and install packages ===
FROM python:3.11-slim-bullseye AS builder
WORKDIR /subgen
# Install required build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
git \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# Copy and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir --prefix=/install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cpu && pip install --no-cache-dir --prefix=/install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
# === Stage 2: Create a minimal runtime image ===
FROM python:3.11-slim-bullseye AS runtime
WORKDIR /subgen
# Install only required runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy only necessary files from builder stage
COPY --from=builder /install /usr/local
# Copy source code
COPY launcher.py subgen.py language_code.py /subgen/
CMD ["python3", "launcher.py"]

483
README.md
View File

@@ -1,282 +1,265 @@
[![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.com/donate/?hosted_button_id=SU4QQP6LH5PF6)
<img src="https://raw.githubusercontent.com/McCloudS/subgen/main/icon.png" width="200">
# 🎬 TranscriptorIO
<details>
<summary>Updates:</summary>
**AI-powered subtitle transcription service with REST API and Web UI**
26 Aug 2025: Renamed environment variables to make them slightly easier to understand. Currently maintains backwards compatibility. See https://github.com/McCloudS/subgen/pull/229
[![Python](https://img.shields.io/badge/Python-3.12+-blue.svg)](https://www.python.org/)
[![FastAPI](https://img.shields.io/badge/FastAPI-0.100+-green.svg)](https://fastapi.tiangolo.com/)
[![Vue.js](https://img.shields.io/badge/Vue.js-3.x-brightgreen.svg)](https://vuejs.org/)
[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
12 Aug 2025: Added distil-large-v3.5
TranscriptorIO is an AI-powered subtitle transcription service based on [Subgen](https://github.com/McCloudS/subgen), featuring a modern FastAPI backend with 45+ REST endpoints, a Vue 3 web interface, and a distributed worker pool architecture.
7 Feb: Fixed (V)RAM clearing, added PLEX_QUEUE_SEASON, other extraneous fixes or refactorting.
---
23 Dec: Added PLEX_QUEUE_NEXT_EPISODE and PLEX_QUEUE_SERIES. Will automatically start generating subtitles for the next episode in your series, or queue the whole series.
## ✨ Features
4 Dec: Added more ENV settings: DETECT_LANGUAGE_OFFSET, PREFERRED_AUDIO_LANGUAGES, SKIP_IF_AUDIO_TRACK_IS, ONLY_SKIP_IF_SUBGEN_SUBTITLE, SKIP_UNKNOWN_LANGUAGE, SKIP_IF_LANGUAGE_IS_NOT_SET_BUT_SUBTITLES_EXIST, SHOULD_WHISPER_DETECT_AUDIO_LANGUAGE
### 🎯 Core Features
- **Whisper Transcription** - Support for faster-whisper and stable-ts
- **Translation** - Two-stage translation: Whisper to English, then Google Translate to target language
- **CPU/GPU Workers** - Scalable worker pool with CUDA support
- **Persistent Queue** - Priority-based queue manager with SQLite/PostgreSQL
- **Library Scanner** - Automatic scanning with configurable rules
- **REST API** - 45+ endpoints with FastAPI
- **Web UI** - Complete Vue 3 dashboard with 6 views
- **Setup Wizard** - Interactive first-run configuration
- **Real-time Monitoring** - File watcher, scheduled scans, and system resources
30 Nov 2024: Signifcant refactoring and handling by Muisje. Added language code class for more robustness and flexibility and ability to separate audio tracks to make sure you get the one you want. New ENV Variables: SUBTITLE_LANGUAGE_NAMING_TYPE, SKIP_IF_AUDIO_TRACK_IS, PREFERRED_AUDIO_LANGUAGE, SKIP_IF_TO_TRANSCRIBE_SUB_ALREADY_EXIST
### 🔧 Technical Features
- **Multiprocessing**: Workers isolated in separate processes
- **Priority Queuing**: Queue with priorities and deduplication
- **Graceful Degradation**: Works without optional dependencies (Whisper, GPU)
- **Thread-Safe**: Row locking and context managers
- **Auto-retry**: Automatic retry of failed jobs
- **Health Monitoring**: Detailed statistics and health checks
- **Database-backed Settings**: All configuration stored in database, editable via Web UI
There will be some minor hiccups, so please identify them as we work through this major overhaul.
---
22 Nov 2024: Updated to support large-v3-turbo
## 🚀 Quick Start
30 Sept 2024: Removed webui
### 1. Install dependencies
5 Sept 2024: Fixed Emby response to a test message/notification. Clarified Emby/Plex/Jellyfin instructions for paths.
```bash
# Basic dependencies
pip install -r requirements.txt
14 Aug 2024: Cleaned up usage of kwargs across the board a bit. Added ability for /asr to encode or not, so you don't need to worry about what files/formats you upload.
3 Aug 2024: Added SUBGEN_KWARGS environment variable which allows you to override the model.transcribe with most options you'd like from whisper, faster-whisper, or stable-ts. This won't be exposed via the webui, it's best to set directly.
21 Apr 2024: Fixed queuing with thanks to https://github.com/xhzhu0628 @ https://github.com/McCloudS/subgen/pull/85. Bazarr intentionally doesn't follow `CONCURRENT_TRANSCRIPTIONS` because it needs a time sensitive response.
31 Mar 2024: Removed `/subsync` endpoint and general refactoring. Open an issue if you were using it!
24 Mar 2024: ~~Added a 'webui' to configure environment variables. You can use this instead of manually editing the script or using Environment Variables in your OS or Docker (if you want). The config will prioritize OS Env Variables, then the .env file, then the defaults. You can access it at `http://subgen:9000/`~~
23 Mar 2024: Added `CUSTOM_REGROUP` to try to 'clean up' subtitles a bit.
22 Mar 2024: Added LRC capability via see: `'LRC_FOR_AUDIO_FILES' | True | Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff' |`
21 Mar 2024: Added a 'wizard' into the launcher that will help standalone users get common Bazarr variables configured. See below in Launcher section. Removed 'Transformers' as an option. While I usually don't like to remove features, I don't think anyone is using this and the results are wildly unpredictable and often cause out of memory errors. Added two new environment variables called `USE_MODEL_PROMPT` and `CUSTOM_MODEL_PROMPT`. If `USE_MODEL_PROMPT` is `True` it will use `CUSTOM_MODEL_PROMPT` if set, otherwise will default to using the pre-configured language pairings, such as: `"en": "Hello, welcome to my lecture.",
"zh": "你好,欢迎来到我的讲座。"` These pre-configurated translations are geared towards fixing some audio that may not have punctionation. We can prompt it to try to force the use of punctuation during transcription.
19 Mar 2024: Added a `MONITOR` environment variable. Will 'watch' or 'monitor' your `TRANSCRIBE_FOLDERS` for changes and run on them. Useful if you just want to paste files into a folder and get subtitles.
6 Mar 2024: Added a `/subsync` endpoint that can attempt to align/synchronize subtitles to a file. Takes audio_file, subtitle_file, language (2 letter code), and outputs an srt.
5 Mar 2024: Cleaned up logging. Added timestamps option (if Debug = True, timestamps will print in logs).
4 Mar 2024: Updated Dockerfile CUDA to 12.2.2 (From CTranslate2). Added endpoint `/status` to return Subgen version. Can also use distil models now! See variables below!
29 Feb 2024: Changed sefault port to align with whisper-asr and deconflict other consumers of the previous port.
11 Feb 2024: Added a 'launcher.py' file for Docker to prevent huge image downloads. Now set UPDATE to True if you want pull the latest version, otherwise it will default to what was in the image on build. Docker builds will still be auto-built on any commit. If you don't want to use the auto-update function, no action is needed on your part and continue to update docker images as before. Fixed bug where detect-langauge could return an empty result. Reduced useless debug output that was spamming logs and defaulted DEBUG to True. Added APPEND, which will add f"Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}" at the end of a subtitle.
10 Feb 2024: Added some features from JaiZed's branch such as skipping if SDH subtitles are detected, functions updated to also be able to transcribe audio files, allow individual files to be manually transcribed, and a better implementation of forceLanguage. Added `/batch` endpoint (Thanks JaiZed). Allows you to navigate in a browser to http://subgen_ip:9000/docs and call the batch endpoint which can take a file or a folder to manually transcribe files. Added CLEAR_VRAM_ON_COMPLETE, HF_TRANSFORMERS, HF_BATCH_SIZE. Hugging Face Transformers boast '9x increase', but my limited testing shows it's comparable to faster-whisper or slightly slower. I also have an older 8gb GPU. Simplest way to persist HF Transformer models is to set "HF_HUB_CACHE" and set it to "/subgen/models" for Docker (assuming you have the matching volume).
8 Feb 2024: Added FORCE_DETECTED_LANGUAGE_TO to force a wrongly detected language. Fixed asr to actually use the language passed to it.
5 Feb 2024: General housekeeping, minor tweaks on the TRANSCRIBE_FOLDERS function.
28 Jan 2024: Fixed issue with ffmpeg python module not importing correctly. Removed separate GPU/CPU containers. Also removed the script from installing packages, which should help with odd updates I can't control (from other packages/modules). The image is a couple gigabytes larger, but allows easier maintenance.
19 Dec 2023: Added the ability for Plex and Jellyfin to automatically update metadata so the subtitles shows up properly on playback. (See https://github.com/McCloudS/subgen/pull/33 from Rikiar73574)
31 Oct 2023: Added Bazarr support via Whipser provider.
25 Oct 2023: Added Emby (IE http://192.168.1.111:9000/emby) support and TRANSCRIBE_FOLDERS, which will recurse through the provided folders and generate subtitles. It's geared towards attempting to transcribe existing media without using a webhook.
23 Oct 2023: There are now two docker images, ones for CPU (it's smaller): mccloud/subgen:latest, mccloud/subgen:cpu, the other is for cuda/GPU: mccloud/subgen:cuda. I also added Jellyfin support and considerable cleanup in the script. I also renamed the webhooks, so they will require new configuration/updates on your end. Instead of /webhook they are now /plex, /tautulli, and /jellyfin.
22 Oct 2023: The script should have backwards compability with previous envirionment settings, but just to be sure, look at the new options below. If you don't want to manually edit your environment variables, just edit the script manually. While I have added GPU support, I haven't tested it yet.
19 Oct 2023: And we're back! Uses faster-whisper and stable-ts. Shouldn't break anything from previous settings, but adds a couple new options that aren't documented at this point in time. As of now, this is not a docker image on dockerhub. The potential intent is to move this eventually to a pure python script, primarily to simplify my efforts. Quick and dirty to meet dependencies: pip or `pip3 install flask requests stable-ts faster-whisper`
This potentially has the ability to use CUDA/Nvidia GPU's, but I don't have one set up yet. Tesla T4 is in the mail!
2 Feb 2023: Added Tautulli webhooks back in. Didn't realize Plex webhooks was PlexPass only. See below for instructions to add it back in.
31 Jan 2023 : Rewrote the script substantially to remove Tautulli and fix some variable handling. For some reason my implementation requires the container to be in host mode. My Plex was giving "401 Unauthorized" when attempt to query from docker subnets during API calls. (**Fixed now, it can be in bridge**)
</details>
# What is this?
This will transcribe your personal media on a Plex, Emby, or Jellyfin server to create subtitles (.srt) from audio/video files with the following languages: https://github.com/McCloudS/subgen#audio-languages-supported-via-openai and transcribe or translate them into english. It can also be used as a Whisper provider in Bazarr (See below instructions). It technically has support to transcribe from a foreign langauge to itself (IE Japanese > Japanese, see [TRANSCRIBE_OR_TRANSLATE](https://github.com/McCloudS/subgen#variables)). It is currently reliant on webhooks from Jellyfin, Emby, Plex, or Tautulli. This uses stable-ts and faster-whisper which can use both Nvidia GPUs and CPUs.
# Why?
Honestly, I built this for me, but saw the utility in other people maybe using it. This works well for my use case. Since having children, I'm either deaf or wanting to have everything quiet. We watch EVERYTHING with subtitles now, and I feel like I can't even understand the show without them. I use Bazarr to auto-download, and gap fill with Plex's built-in capability. This is for everything else. Some shows just won't have subtitles available for some reason or another, or in some cases on my H265 media, they are wildly out of sync.
# What can it do?
* Create .srt subtitles when a media file is added or played which triggers off of Jellyfin, Plex, or Tautulli webhooks. It can also be called via the Whisper provider inside Bazarr.
# How do I set it up?
## Install/Setup
### Standalone/Without Docker
Install python3 (Whisper supports Python 3.9-3.11), ffmpeg, and download launcher.py from this repository. Then run it: `python3 launcher.py -u -i -s`. You need to have matching paths relative to your Plex server/folders, or use USE_PATH_MAPPING. Paths are not needed if you are only using Bazarr. You will need the appropriate NVIDIA drivers installed minimum of CUDA Toolkit 12.3 (12.3.2 is known working): https://developer.nvidia.com/cuda-toolkit-archive
Note: If you have previously had Subgen running in standalone, you may need to run `pip install --upgrade --force-reinstall faster-whisper git+https://github.com/jianfch/stable-ts.git` to force the install of the newer stable-ts package.
#### Using Launcher
launcher.py can launch subgen for you and automate the setup and can take the following options:
![image](https://github.com/McCloudS/subgen/assets/64094529/081f95b2-7a09-498f-a39e-5ea66e0bc7e1)
Using `-s` for Bazarr setup:
![image](https://github.com/McCloudS/subgen/assets/64094529/ade1b886-3b99-4f80-95ac-bb28608259bb)
### Docker
The dockerfile is in the repo along with an example docker-compose file, and is also posted on dockerhub (mccloud/subgen).
If using Subgen without Bazarr, you MUST mount your media volumes in subgen the same way Plex (or your media server) sees them. For example, if Plex uses "/Share/media/TV:/tv" you must have that identical volume in subgen.
`"${APPDATA}/subgen/models:/subgen/models"` is just for storage of the language models. This isn't necessary, but you will have to redownload the models on any new image pulls if you don't use it.
`"${APPDATA}/subgen/subgen.py:/subgen/subgen.py"` If you want to control the version of subgen.py by yourself. Launcher.py can still be used to download a newer version.
If you want to use a GPU, you need to map it accordingly.
#### Unraid
While Unraid doesn't have an app or template for quick install, with minor manual work, you can install it. See [https://github.com/McCloudS/subgen/discussions/137](https://github.com/McCloudS/subgen/discussions/137) for pictures and steps.
## Bazarr
You only need to confiure the Whisper Provider as shown below: <br>
![bazarr_configuration](https://wiki.bazarr.media/Additional-Configuration/images/whisper_config.png) <br>
The Docker Endpoint is the ip address and port of your subgen container (IE http://192.168.1.111:9000) See https://wiki.bazarr.media/Additional-Configuration/Whisper-Provider/ for more info. **127.0.0.1 WILL NOT WORK IF YOU ARE RUNNING BAZARR IN A DOCKER CONTAINER!** I recomend not enabling using the Bazarr provider with other webhooks in Subgen, or you will likely be generating duplicate subtitles. If you are using Bazarr, path mapping isn't necessary, as Bazarr sends the file over http.
**The defaults of Subgen will allow it to run in Bazarr with zero configuration. However, you will probably want to change, at a minimum, `TRANSCRIBE_DEVICE` and `WHISPER_MODEL`.**
## Plex
Create a webhook in Plex that will call back to your subgen address, IE: http://192.168.1.111:9000/plex see: https://support.plex.tv/articles/115002267687-webhooks/ You will also need to generate the token to use it. Remember, Plex and Subgen need to be able to see the exact same files at the exact same paths, otherwise you need `USE_PATH_MAPPING`.
## Emby
All you need to do is create a webhook in Emby pointing to your subgen IE: `http://192.168.154:9000/emby`, set `Request content type` to `multipart/form-data` and configure your desired events (Usually, `New Media Added`, `Start`, and `Unpause`). See https://github.com/McCloudS/subgen/discussions/115#discussioncomment-10569277 for screenshot examples.
Emby was really nice and provides good information in their responses, so we don't need to add an API token or server url to query for more information.
Remember, Emby and Subgen need to be able to see the exact same files at the exact same paths, otherwise you need `USE_PATH_MAPPING`.
## Tautulli
Create the webhooks in Tautulli with the following settings:
Webhook URL: http://yourdockerip:9000/tautulli
Webhook Method: Post
Triggers: Whatever you want, but you'll likely want "Playback Start" and "Recently Added"
Data: Under Playback Start, JSON Header will be:
```json
{ "source":"Tautulli" }
# Transcription dependencies (optional - required for actual transcription)
pip install stable-ts faster-whisper
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install av>=10.0.0
```
Data:
```json
{
"event":"played",
"file":"{file}",
"filename":"{filename}",
"mediatype":"{media_type}"
}
### 2. First run (Setup Wizard)
```bash
# The setup wizard runs automatically on first start
python backend/cli.py server
# Or run setup wizard manually
python backend/cli.py setup
```
Similarly, under Recently Added, Header is:
```json
{ "source":"Tautulli" }
The setup wizard will guide you through:
- **Standalone mode**: Configure library paths, scan rules, and workers
- **Bazarr mode**: Configure as Bazarr subtitle provider (in development)
### 3. Start the server
```bash
# Development (with auto-reload)
python backend/cli.py server --reload
# Production
python backend/cli.py server --host 0.0.0.0 --port 8000 --workers 4
```
Data:
```json
{
"event":"added",
"file":"{file}",
"filename":"{filename}",
"mediatype":"{media_type}"
}
### 4. Access the application
| URL | Description |
|-----|-------------|
| http://localhost:8000 | Web UI (Dashboard) |
| http://localhost:8000/docs | Swagger API Documentation |
| http://localhost:8000/redoc | ReDoc API Documentation |
| http://localhost:8000/health | Health Check Endpoint |
---
## 📋 CLI Commands
```bash
# Server
python backend/cli.py server [options]
--host HOST Host (default: 0.0.0.0)
--port PORT Port (default: 8000)
--reload Auto-reload for development
--workers N Number of uvicorn workers (default: 1)
--log-level LEVEL Log level (default: info)
# Setup wizard
python backend/cli.py setup # Run setup wizard
# Database
python backend/cli.py db init # Initialize database
python backend/cli.py db reset # Reset (WARNING: deletes all data!)
# Standalone worker
python backend/cli.py worker --type cpu
python backend/cli.py worker --type gpu --device-id 0
# Manual scan
python backend/cli.py scan /path/to/media [--no-recursive]
```
## Jellyfin
First, you need to install the Jellyfin webhooks plugin. Then you need to click "Add Generic Destination", name it anything you want, webhook url is your subgen info (IE http://192.168.1.154:9000/jellyfin). Next, check Item Added, Playback Start, and Send All Properties. Last, "Add Request Header" and add the Key: `Content-Type` Value: `application/json`<br><br>Click Save and you should be all set!
---
Remember, Jellyfin and Subgen need to be able to see the exact same files at the exact same paths, otherwise you need `USE_PATH_MAPPING`.
## 🏗️ Architecture
## Variables
```
┌─────────────────────────────────────────────────────────┐
│ FastAPI Server │
│ ┌─────────────────────────────────────────────────┐ │
│ │ REST API (45+ endpoints) │ │
│ │ /api/workers | /api/jobs | /api/settings │ │
│ │ /api/scanner | /api/system | /api/setup │ │
│ └─────────────────────────────────────────────────┘ │
└──────────────────┬──────────────────────────────────────┘
┌──────────────┼──────────────┬──────────────────┐
│ │ │ │
▼ ▼ ▼ ▼
┌────────┐ ┌──────────┐ ┌─────────┐ ┌──────────┐
│ Worker │ │ Queue │ │ Scanner │ │ Database │
│ Pool │◄──┤ Manager │◄──┤ Engine │ │ SQLite/ │
│ CPU/GPU│ │ Priority │ │ Rules + │ │ Postgres │
└────────┘ │ Queue │ │ Watcher │ └──────────┘
└──────────┘ └─────────┘
```
You can define the port via environment variables, but the endpoints are static.
### Data Flow
The following environment variables are available in Docker. They will default to the values listed below.
| Variable | Default Value | Description |
|---------------------------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| TRANSCRIBE_DEVICE | 'cpu' | Can transcribe via gpu (Cuda only) or cpu. Takes option of "cpu", "gpu", "cuda". |
| WHISPER_MODEL | 'medium' | Can be:'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1','large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-large-v3', 'distil-large-v3.5', 'distil-medium.en', 'distil-small.en', 'large-v3-turbo' |
| CONCURRENT_TRANSCRIPTIONS | 2 | Number of files it will transcribe in parallel |
| WHISPER_THREADS | 4 | number of threads to use during computation |
| MODEL_PATH | './models' | This is where the WHISPER_MODEL will be stored. This defaults to placing it where you execute the script in the folder 'models' |
| PROCESS_ADDED_MEDIA | True | will gen subtitles for all media added regardless of existing external/embedded subtitles (based off of SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE) |
| PROCESS_MEDIA_ON_PLAY | True | will gen subtitles for all played media regardless of existing external/embedded subtitles (based off of SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE) |
| SUBTITLE_LANGUAGE_NAME | 'aa' | allows you to pick what it will name the subtitle. Instead of using EN, I'm using AA, so it doesn't mix with exiting external EN subs, and AA will populate higher on the list in Plex. This will override the Whisper detected language for a file name. |
| SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE | 'eng' | Will not generate a subtitle if the file has an internal sub matching the 3 letter code of this variable (See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) |
| WORD_LEVEL_HIGHLIGHT | False | Highlights each words as it's spoken in the subtitle. See example video @ https://github.com/jianfch/stable-ts |
| PLEX_SERVER | 'http://plex:32400' | This needs to be set to your local plex server address/port |
| PLEX_TOKEN | 'token here' | This needs to be set to your plex token found by https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/ |
| JELLYFIN_SERVER | 'http://jellyfin:8096' | Set to your Jellyfin server address/port |
| JELLYFIN_TOKEN | 'token here' | Generate a token inside the Jellyfin interface |
| WEBHOOK_PORT | 9000 | Change this if you need a different port for your webhook |
| USE_PATH_MAPPING | False | Similar to sonarr and radarr path mapping, this will attempt to replace paths on file systems that don't have identical paths. Currently only support for one path replacement. Examples below. |
| PATH_MAPPING_FROM | '/tv' | This is the path of my media relative to my Plex server |
| PATH_MAPPING_TO | '/Volumes/TV' | This is the path of that same folder relative to my Mac Mini that will run the script |
| TRANSCRIBE_FOLDERS | '' | Takes a pipe '\|' separated list (For example: /tv\|/movies\|/familyvideos) and iterates through and adds those files to be queued for subtitle generation if they don't have internal subtitles |
| TRANSCRIBE_OR_TRANSLATE | 'transcribe' | Takes either 'transcribe' or 'translate'. Transcribe will transcribe the audio in the same language as the input. Translate will transcribe and translate into English. |
| COMPUTE_TYPE | 'auto' | Set compute-type using the following information: https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md |
| DEBUG | True | Provides some debug data that can be helpful to troubleshoot path mapping and other issues. Fun fact, if this is set to true, any modifications to the script will auto-reload it (if it isn't actively transcoding). Useful to make small tweaks without re-downloading the whole file. |
| FORCE_DETECTED_LANGUAGE_TO | '' | This is to force the model to a language instead of the detected one, takes a 2 letter language code. For example, your audio is French but keeps detecting as English, you would set it to 'fr' |
| CLEAR_VRAM_ON_COMPLETE | True | This will delete the model and do garbage collection when queue is empty. Good if you need to use the VRAM for something else. |
| UPDATE | False | Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates. |
| APPEND | False | Will add the following at the end of a subtitle: "Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}"
| MONITOR | False | Will monitor `TRANSCRIBE_FOLDERS` for real-time changes to see if we need to generate subtitles |
| USE_MODEL_PROMPT | False | When set to `True`, will use the default prompt stored in greetings_translations "Hello, welcome to my lecture." to try and force the use of punctuation in transcriptions that don't. Automatic `CUSTOM_MODEL_PROMPT` will only work with ASR, but can still be set manually like so: `USE_MODEL_PROMPT=True and CUSTOM_MODEL_PROMPT=Hello, welcome to my lecture.` |
| CUSTOM_MODEL_PROMPT | '' | If `USE_MODEL_PROMPT` is `True`, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples). |
| LRC_FOR_AUDIO_FILES | True | Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff' |
| CUSTOM_REGROUP | 'cm_sl=84_sl=42++++++1' | Attempts to regroup some of the segments to make a cleaner looking subtitle. See https://github.com/McCloudS/subgen/issues/68 for discussion. Set to blank if you want to use Stable-TS default regroups algorithm of `cm_sp=,* /_sg=.5_mg=.3+3_sp=.* /。/?/` |
| DETECT_LANGUAGE_LENGTH | 30 | Detect language on the first x seconds of the audio. |
| SKIP_IF_EXTERNAL_SUBTITLES_EXIST | False | Skip subtitle generation if an external subtitle with the same language code as NAMESUBLANG is present. Used for the case of not regenerating subtitles if I already have `Movie (2002).NAMESUBLANG.srt` from a non-subgen source. |
| SUBGEN_KWARGS | '{}' | Takes a kwargs python dictionary of options you would like to add/override. For advanced users. An example would be `{'vad': True, 'prompt_reset_on_temperature': 0.35}` |
| SKIP_SUBTITLE_LANGUAGES | '' | Takes a pipe separated `\|` list of 3 letter language codes to not generate subtitles for example 'eng\|deu'|
| SUBTITLE_LANGUAGE_NAMING_TYPE | 'ISO_639_2_B' | The type of naming format desired, such as 'ISO_639_1', 'ISO_639_2_T', 'ISO_639_2_B', 'NAME', or 'NATIVE', for example: ("es", "spa", "spa", "Spanish", "Español") |
| SKIP_SUBTITLE_LANGUAGES | '' | Takes a pipe separated `\|` list of 3 letter language codes to skip if the file has audio in that language. This could be used to skip generating subtitles for a language you don't want, like, I speak English, don't generate English subtitles (for example: 'eng\|deu')|
| PREFERRED_AUDIO_LANGUAGE | 'eng' | If there are multiple audio tracks in a file, it will prefer this setting |
| SKIP_IF_TARGET_SUBTITLES_EXIST | True | Skips generation of subtitle if a file matches our desired language already. |
| DETECT_LANGUAGE_OFFSET | 0 | Allows you to shift when to run detect_language, geared towards avoiding introductions or songs. |
| PREFERRED_AUDIO_LANGUAGES | 'eng' | Pipe separated list |
| SKIP_IF_AUDIO_TRACK_IS | '' | Takes a pipe separated list of ISO 639-2 languages. Skips generation of subtitle if the file has the audio file listed. |
| SKIP_ONLY_SUBGEN_SUBTITLES | False | Skips generation of subtitles if the file has "subgen" somewhere in the same |
| SKIP_UNKNOWN_LANGUAGE | False | Skips generation if the file has an unknown language |
| SKIP_IF_NO_LANGUAGE_BUT_SUBTITLES_EXIST | False | Skips generation if file doesn't have an audio stream marked with a language |
| SHOULD_WHISPER_DETECT_AUDIO_LANGUAGE | False | Should Whisper try to detect the language if there is no audio language specified via force langauge |
| PLEX_QUEUE_NEXT_EPISODE | False | Will queue the next Plex series episode for subtitle generation if subgen is triggered. |
| PLEX_QUEUE_SEASON | False | Will queue the rest of the Plex season for subtitle generation if subgen is triggered. |
| PLEX_QUEUE_SERIES | False | Will queue the whole Plex series for subtitle generation if subgen is triggered. |
| SHOW_IN_SUBNAME_SUBGEN | True | Adds subgen to the subtitle file name. |
| SHOW_IN_SUBNAME_MODEL | True | Adds Whisper model name to the subtitle file name. |
1. **LibraryScanner** detects files (manual/scheduled/watcher)
2. **FileAnalyzer** analyzes with ffprobe (audio tracks, subtitles)
3. **Rules Engine** evaluates against configurable ScanRules
4. **QueueManager** adds job to persistent queue (with deduplication)
5. **Worker** processes with WhisperTranscriber
6. **Output**: Generates `.eng.srt` (transcription) or `.{lang}.srt` (translation)
### Images:
`mccloud/subgen:latest` is GPU or CPU <br>
`mccloud/subgen:cpu` is for CPU only (slightly smaller image)
<br><br>
---
# What are the limitations/problems?
## 🖥️ Web UI
* I made it and know nothing about formal deployment for python coding.
* It's using trained AI models to transcribe, so it WILL mess up
The Web UI includes 6 complete views:
# What's next?
| View | Description |
|------|-------------|
| **Dashboard** | System overview, resource monitoring (CPU/RAM/GPU), recent jobs |
| **Queue** | Job management with filters, pagination, retry/cancel actions |
| **Scanner** | Scanner control, scheduler configuration, manual scan trigger |
| **Rules** | Scan rules CRUD with create/edit modal |
| **Workers** | Worker pool management, add/remove workers dynamically |
| **Settings** | Database-backed settings organized by category |
Fix documentation and make it prettier!
# Audio Languages Supported (via OpenAI)
---
Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh.
## 🎛️ Configuration
# Known Issues
### Database-backed Settings
At this time, if you have high CPU usage when not actively transcribing on the CPU only docker, try the GPU one.
All configuration is stored in the database and manageable via:
- **Setup Wizard** (first run)
- **Settings page** in Web UI
- **Settings API** (`/api/settings`)
# Additional reading:
### Settings Categories
* https://github.com/openai/whisper (Original OpenAI project)
* https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes (2 letter subtitle codes)
| Category | Settings |
|----------|----------|
| **General** | Operation mode, library paths, log level |
| **Workers** | CPU/GPU worker counts, auto-start, health check interval |
| **Transcription** | Whisper model, compute type, skip existing files |
| **Scanner** | Enable/disable, schedule interval, file watcher |
| **Bazarr** | Provider mode (in development) |
# Credits:
* Whisper.cpp (https://github.com/ggerganov/whisper.cpp) for original implementation
* Google
* ffmpeg
* https://github.com/jianfch/stable-ts
* https://github.com/guillaumekln/faster-whisper
* Whipser ASR Webservice (https://github.com/ahmetoner/whisper-asr-webservice) for how to implement Bazarr webhooks.
### Environment Variables
Only `DATABASE_URL` is required in `.env`:
```bash
# SQLite (default)
DATABASE_URL=sqlite:///./transcriptarr.db
# PostgreSQL (production)
DATABASE_URL=postgresql://user:pass@localhost/transcriptarr
```
---
## 📚 Documentation
| Document | Description |
|----------|-------------|
| [docs/API.md](docs/API.md) | Complete REST API documentation (45+ endpoints) |
| [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Backend architecture and components |
| [docs/FRONTEND.md](docs/FRONTEND.md) | Frontend structure and components |
| [docs/CONFIGURATION.md](docs/CONFIGURATION.md) | Configuration system and settings |
---
## 🐳 Docker
```bash
# CPU only
docker build -t transcriptorio:cpu -f Dockerfile.cpu .
# GPU (NVIDIA CUDA)
docker build -t transcriptorio:gpu -f Dockerfile .
# Run
docker run -d \
-p 8000:8000 \
-v /path/to/media:/media \
-v /path/to/data:/app/data \
--gpus all \
transcriptorio:gpu
```
---
## 📊 Project Status
| Component | Status | Progress |
|-----------|--------|----------|
| Core Backend | ✅ Complete | 100% |
| REST API (45+ endpoints) | ✅ Complete | 100% |
| Worker System | ✅ Complete | 100% |
| Library Scanner | ✅ Complete | 100% |
| Web UI (6 views) | ✅ Complete | 100% |
| Settings System | ✅ Complete | 100% |
| Setup Wizard | ✅ Complete | 100% |
| Bazarr Provider | ⏳ In Development | 30% |
| Testing Suite | ⏳ Pending | 0% |
| Docker | ⏳ Pending | 0% |
---
## 🤝 Contributing
Contributions are welcome!
---
## 📝 Credits
Based on [Subgen](https://github.com/McCloudS/subgen) by McCloudS.
Architecture redesigned with:
- FastAPI for REST APIs
- SQLAlchemy for persistence
- Multiprocessing for workers
- Whisper (stable-ts / faster-whisper) for transcription
- Vue 3 + Pinia for frontend
---
## 📄 License
MIT License - See [LICENSE](LICENSE) for details.

611
backend/API.md Normal file
View File

@@ -0,0 +1,611 @@
# TranscriptorIO REST API
Documentación completa de las APIs REST del backend de TranscriptorIO.
## 🚀 Inicio Rápido
### Ejecutar el servidor
```bash
# Usando el CLI
python backend/cli.py server --host 0.0.0.0 --port 8000
# Con auto-reload (desarrollo)
python backend/cli.py server --reload
# Con múltiples workers (producción)
python backend/cli.py server --workers 4
```
### Documentación interactiva
Una vez iniciado el servidor, accede a:
- **Swagger UI**: http://localhost:8000/docs
- **ReDoc**: http://localhost:8000/redoc
## 📋 Endpoints
### System Status
#### `GET /`
Información básica de la API.
**Response:**
```json
{
"name": "TranscriptorIO API",
"version": "1.0.0",
"status": "running"
}
```
#### `GET /health`
Health check para monitoring.
**Response:**
```json
{
"status": "healthy",
"database": "connected",
"workers": 2,
"queue_size": 5
}
```
#### `GET /api/status`
Estado completo del sistema.
**Response:**
```json
{
"system": {
"status": "running",
"uptime_seconds": 3600.5
},
"workers": {
"total_workers": 2,
"cpu_workers": 1,
"gpu_workers": 1,
"idle_workers": 1,
"busy_workers": 1,
"total_jobs_completed": 42,
"total_jobs_failed": 2
},
"queue": {
"total": 100,
"queued": 5,
"processing": 2,
"completed": 90,
"failed": 3
},
"scanner": {
"scheduler_running": true,
"next_scan_time": "2026-01-13T02:00:00",
"watcher_running": true
}
}
```
---
## 👷 Workers API (`/api/workers`)
### `GET /api/workers`
Lista todos los workers activos.
**Response:**
```json
[
{
"worker_id": "worker-cpu-0",
"worker_type": "cpu",
"device_id": null,
"status": "busy",
"current_job_id": "abc123",
"jobs_completed": 10,
"jobs_failed": 0,
"uptime_seconds": 3600.5,
"current_job_progress": 45.2,
"current_job_eta": 120
}
]
```
### `GET /api/workers/stats`
Estadísticas del pool de workers.
**Response:**
```json
{
"total_workers": 2,
"cpu_workers": 1,
"gpu_workers": 1,
"idle_workers": 1,
"busy_workers": 1,
"stopped_workers": 0,
"error_workers": 0,
"total_jobs_completed": 42,
"total_jobs_failed": 2,
"uptime_seconds": 3600.5,
"is_running": true
}
```
### `GET /api/workers/{worker_id}`
Obtener estado de un worker específico.
**Response:** Same as individual worker in list
### `POST /api/workers`
Añadir un nuevo worker al pool.
**Request:**
```json
{
"worker_type": "gpu",
"device_id": 0
}
```
**Response:** Worker status object
### `DELETE /api/workers/{worker_id}`
Remover un worker del pool.
**Query Params:**
- `timeout` (float, default=30.0): Timeout en segundos
**Response:**
```json
{
"message": "Worker worker-cpu-0 removed successfully"
}
```
### `POST /api/workers/pool/start`
Iniciar el pool de workers.
**Query Params:**
- `cpu_workers` (int, default=0)
- `gpu_workers` (int, default=0)
**Response:**
```json
{
"message": "Worker pool started: 1 CPU workers, 1 GPU workers"
}
```
### `POST /api/workers/pool/stop`
Detener el pool de workers.
**Query Params:**
- `timeout` (float, default=30.0)
**Response:**
```json
{
"message": "Worker pool stopped successfully"
}
```
---
## 📋 Jobs API (`/api/jobs`)
### `GET /api/jobs`
Lista de trabajos con paginación.
**Query Params:**
- `status_filter` (optional): queued, processing, completed, failed, cancelled
- `page` (int, default=1): Número de página
- `page_size` (int, default=50): Items por página
**Response:**
```json
{
"jobs": [
{
"id": "abc123",
"file_path": "/media/anime/episode.mkv",
"file_name": "episode.mkv",
"status": "completed",
"priority": 10,
"source_lang": "ja",
"target_lang": "es",
"quality_preset": "fast",
"transcribe_or_translate": "transcribe",
"progress": 100.0,
"current_stage": "finalizing",
"eta_seconds": null,
"created_at": "2026-01-12T10:00:00",
"started_at": "2026-01-12T10:00:05",
"completed_at": "2026-01-12T10:05:30",
"output_path": "/media/anime/episode.es.srt",
"segments_count": 245,
"error": null,
"retry_count": 0,
"worker_id": "worker-gpu-0",
"vram_used_mb": 4096,
"processing_time_seconds": 325.5,
"model_used": "large-v3",
"device_used": "cuda:0"
}
],
"total": 100,
"page": 1,
"page_size": 50
}
```
### `GET /api/jobs/stats`
Estadísticas de la cola.
**Response:**
```json
{
"total_jobs": 100,
"queued": 5,
"processing": 2,
"completed": 90,
"failed": 3,
"cancelled": 0
}
```
### `GET /api/jobs/{job_id}`
Obtener un trabajo específico.
**Response:** Job object (same as in list)
### `POST /api/jobs`
Crear un nuevo trabajo de transcripción.
**Request:**
```json
{
"file_path": "/media/anime/Attack on Titan S04E01.mkv",
"file_name": "Attack on Titan S04E01.mkv",
"source_lang": "ja",
"target_lang": "es",
"quality_preset": "fast",
"transcribe_or_translate": "transcribe",
"priority": 10,
"is_manual_request": true
}
```
**Response:** Created job object
### `POST /api/jobs/{job_id}/retry`
Reintentar un trabajo fallido.
**Response:** Updated job object
### `DELETE /api/jobs/{job_id}`
Cancelar un trabajo.
**Response:**
```json
{
"message": "Job abc123 cancelled successfully"
}
```
### `POST /api/jobs/queue/clear`
Limpiar trabajos completados.
**Response:**
```json
{
"message": "Cleared 42 completed jobs"
}
```
---
## 📏 Scan Rules API (`/api/scan-rules`)
### `GET /api/scan-rules`
Lista todas las reglas de escaneo.
**Query Params:**
- `enabled_only` (bool, default=false): Solo reglas habilitadas
**Response:**
```json
[
{
"id": 1,
"name": "Japanese anime without Spanish subs",
"enabled": true,
"priority": 10,
"conditions": {
"audio_language_is": "ja",
"audio_language_not": null,
"audio_track_count_min": null,
"has_embedded_subtitle_lang": null,
"missing_embedded_subtitle_lang": "es",
"missing_external_subtitle_lang": "es",
"file_extension": ".mkv,.mp4"
},
"action": {
"action_type": "transcribe",
"target_language": "es",
"quality_preset": "fast",
"job_priority": 5
},
"created_at": "2026-01-12T10:00:00",
"updated_at": null
}
]
```
### `GET /api/scan-rules/{rule_id}`
Obtener una regla específica.
**Response:** Rule object (same as in list)
### `POST /api/scan-rules`
Crear una nueva regla de escaneo.
**Request:**
```json
{
"name": "Japanese anime without Spanish subs",
"enabled": true,
"priority": 10,
"conditions": {
"audio_language_is": "ja",
"missing_embedded_subtitle_lang": "es",
"missing_external_subtitle_lang": "es",
"file_extension": ".mkv,.mp4"
},
"action": {
"action_type": "transcribe",
"target_language": "es",
"quality_preset": "fast",
"job_priority": 5
}
}
```
**Response:** Created rule object
### `PUT /api/scan-rules/{rule_id}`
Actualizar una regla.
**Request:** Same as POST (all fields optional)
**Response:** Updated rule object
### `DELETE /api/scan-rules/{rule_id}`
Eliminar una regla.
**Response:**
```json
{
"message": "Scan rule 1 deleted successfully"
}
```
### `POST /api/scan-rules/{rule_id}/toggle`
Activar/desactivar una regla.
**Response:** Updated rule object
---
## 🔍 Scanner API (`/api/scanner`)
### `GET /api/scanner/status`
Estado del scanner.
**Response:**
```json
{
"scheduler_enabled": true,
"scheduler_running": true,
"next_scan_time": "2026-01-13T02:00:00",
"watcher_enabled": true,
"watcher_running": true,
"watched_paths": ["/media/anime", "/media/movies"],
"last_scan_time": "2026-01-12T02:00:00",
"total_scans": 1523
}
```
### `POST /api/scanner/scan`
Ejecutar escaneo manual.
**Request:**
```json
{
"paths": ["/media/anime", "/media/movies"],
"recursive": true
}
```
**Response:**
```json
{
"scanned_files": 150,
"matched_files": 25,
"jobs_created": 25,
"skipped_files": 125,
"paths_scanned": ["/media/anime", "/media/movies"]
}
```
### `POST /api/scanner/scheduler/start`
Iniciar escaneo programado.
**Request:**
```json
{
"enabled": true,
"cron_expression": "0 2 * * *",
"paths": ["/media/anime"],
"recursive": true
}
```
**Response:**
```json
{
"message": "Scheduler started successfully"
}
```
### `POST /api/scanner/scheduler/stop`
Detener escaneo programado.
**Response:**
```json
{
"message": "Scheduler stopped successfully"
}
```
### `POST /api/scanner/watcher/start`
Iniciar observador de archivos.
**Request:**
```json
{
"enabled": true,
"paths": ["/media/anime"],
"recursive": true
}
```
**Response:**
```json
{
"message": "File watcher started successfully"
}
```
### `POST /api/scanner/watcher/stop`
Detener observador de archivos.
**Response:**
```json
{
"message": "File watcher stopped successfully"
}
```
### `POST /api/scanner/analyze`
Analizar un archivo específico.
**Query Params:**
- `file_path` (required): Ruta al archivo
**Response:**
```json
{
"file_path": "/media/anime/episode.mkv",
"audio_tracks": [
{
"index": 0,
"codec": "aac",
"language": "ja",
"channels": 2
}
],
"embedded_subtitles": [],
"external_subtitles": [
{
"path": "/media/anime/episode.en.srt",
"language": "en"
}
],
"duration_seconds": 1440.5,
"is_video": true
}
```
---
## 🔐 Autenticación
> **TODO**: Implementar autenticación con JWT tokens
---
## 📊 Códigos de Error
- `200 OK`: Éxito
- `201 Created`: Recurso creado
- `400 Bad Request`: Parámetros inválidos
- `404 Not Found`: Recurso no encontrado
- `409 Conflict`: Conflicto (ej: duplicado)
- `500 Internal Server Error`: Error del servidor
---
## 🧪 Testing
### cURL Examples
```bash
# Get system status
curl http://localhost:8000/api/status
# Create a job
curl -X POST http://localhost:8000/api/jobs \
-H "Content-Type: application/json" \
-d '{
"file_path": "/media/anime/episode.mkv",
"file_name": "episode.mkv",
"target_lang": "es",
"quality_preset": "fast"
}'
# Add a GPU worker
curl -X POST http://localhost:8000/api/workers \
-H "Content-Type: application/json" \
-d '{
"worker_type": "gpu",
"device_id": 0
}'
```
### Python Example
```python
import requests
# Base URL
BASE_URL = "http://localhost:8000"
# Create a job
response = requests.post(
f"{BASE_URL}/api/jobs",
json={
"file_path": "/media/anime/episode.mkv",
"file_name": "episode.mkv",
"target_lang": "es",
"quality_preset": "fast"
}
)
job = response.json()
print(f"Job created: {job['id']}")
# Check job status
response = requests.get(f"{BASE_URL}/api/jobs/{job['id']}")
status = response.json()
print(f"Job status: {status['status']} - {status['progress']}%")
```
---
## 📝 Notas
- Todas las fechas están en formato ISO 8601 UTC
- Los idiomas usan códigos ISO 639-1 (2 letras: ja, en, es, fr, etc.)
- La paginación usa índices base-1 (primera página = 1)
- Los workers se identifican por ID único generado automáticamente

View File

@@ -1,185 +0,0 @@
# TranscriptorIO Backend
This is the redesigned backend for TranscriptorIO, a complete fork of SubGen with modern asynchronous architecture.
## 🎯 Goal
Replace SubGen's synchronous non-persistent system with a modern Tdarr-inspired architecture:
- ✅ Persistent queue (SQLite/PostgreSQL/MariaDB)
- ✅ Asynchronous processing
- ✅ Job prioritization
- ✅ Complete state visibility
- ✅ No Bazarr timeouts
## 📁 Structure
```
backend/
├── core/
│ ├── database.py # Multi-backend database management
│ ├── models.py # SQLAlchemy models (Job, etc.)
│ ├── queue_manager.py # Asynchronous persistent queue
│ └── __init__.py
├── api/ # (coming soon) FastAPI endpoints
├── config.py # Centralized configuration with Pydantic
└── README.md # This file
```
## 🚀 Setup
### 1. Install dependencies
```bash
pip install -r requirements.txt
```
### 2. Configure .env
Copy `.env.example` to `.env` and adjust as needed:
```bash
cp .env.example .env
```
#### Database Options
**SQLite (default)**:
```env
DATABASE_URL=sqlite:///./transcriptarr.db
```
**PostgreSQL**:
```bash
pip install psycopg2-binary
```
```env
DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr
```
**MariaDB/MySQL**:
```bash
pip install pymysql
```
```env
DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr
```
### 3. Choose operation mode
**Standalone Mode** (automatically scans your library):
```env
TRANSCRIPTARR_MODE=standalone
LIBRARY_PATHS=/media/anime|/media/movies
AUTO_SCAN_ENABLED=True
SCAN_INTERVAL_MINUTES=30
```
**Provider Mode** (receives jobs from Bazarr):
```env
TRANSCRIPTARR_MODE=provider
BAZARR_URL=http://bazarr:6767
BAZARR_API_KEY=your_api_key
```
**Hybrid Mode** (both simultaneously):
```env
TRANSCRIPTARR_MODE=standalone,provider
```
## 🧪 Testing
Run the test script to verify everything works:
```bash
python test_backend.py
```
This will verify:
- ✓ Configuration loading
- ✓ Database connection
- ✓ Table creation
- ✓ Queue operations (add, get, deduplicate)
## 📊 Implemented Components
### config.py
- Centralized configuration with Pydantic
- Automatic environment variable validation
- Multi-backend database support
- Operation mode configuration
### database.py
- Connection management with SQLAlchemy
- Support for SQLite, PostgreSQL, MariaDB
- Backend-specific optimizations
- SQLite: WAL mode, optimized cache
- PostgreSQL: connection pooling, pre-ping
- MariaDB: utf8mb4 charset, pooling
- Health checks and statistics
### models.py
- Complete `Job` model with:
- States: queued, processing, completed, failed, cancelled
- Stages: pending, detecting_language, transcribing, translating, etc.
- Quality presets: fast, balanced, best
- Progress tracking (0-100%)
- Complete timestamps
- Retry logic
- Worker assignment
- Optimized indexes for common queries
### queue_manager.py
- Thread-safe persistent queue
- Job prioritization
- Duplicate detection
- Automatic retry for failed jobs
- Real-time statistics
- Automatic cleanup of old jobs
## 🔄 Comparison with SubGen
| Feature | SubGen | TranscriptorIO |
|---------|--------|----------------|
| Queue | In-memory (lost on restart) | **Persistent in DB** |
| Processing | Synchronous (blocks threads) | **Asynchronous** |
| Prioritization | No | **Yes (configurable)** |
| Visibility | No progress/ETA | **Progress + real-time ETA** |
| Deduplication | Basic (memory only) | **Persistent + intelligent** |
| Retries | No | **Automatic with limit** |
| Database | No | **SQLite/PostgreSQL/MariaDB** |
| Bazarr Timeouts | Yes (>5min = 24h throttle) | **No (async)** |
## 📝 Next Steps
1. **Worker Pool** - Asynchronous worker system
2. **REST API** - FastAPI endpoints for management
3. **WebSocket** - Real-time updates
4. **Transcriber** - Whisper wrapper with progress callbacks
5. **Bazarr Provider** - Improved async provider
6. **Standalone Scanner** - Automatic library scanning
## 🐛 Troubleshooting
### Error: "No module named 'backend'"
Make sure to run scripts from the project root:
```bash
cd /home/dasemu/Hacking/Transcriptarr
python test_backend.py
```
### Error: Database locked (SQLite)
SQLite is configured with WAL mode for better concurrency. If you still have issues, consider using PostgreSQL for production.
### Error: pydantic.errors.ConfigError
Verify that all required variables are in your `.env`:
```bash
cp .env.example .env
# Edit .env with your values
```
## 📚 Documentation
See `CLAUDE.md` for complete architecture and project roadmap.

View File

@@ -1 +1,17 @@
"""TranscriptorIO API Module."""
"""API module for TranscriptorIO backend."""
from backend.api.workers import router as workers_router
from backend.api.jobs import router as jobs_router
from backend.api.scan_rules import router as scan_rules_router
from backend.api.scanner import router as scanner_router
from backend.api.settings import router as settings_router
from backend.api.setup_wizard import router as setup_router
__all__ = [
"workers_router",
"jobs_router",
"scan_rules_router",
"scanner_router",
"settings_router",
"setup_router",
]

113
backend/api/filesystem.py Normal file
View File

@@ -0,0 +1,113 @@
"""Filesystem browsing API for path selection."""
import logging
import os
from typing import List, Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/filesystem", tags=["filesystem"])
class DirectoryItem(BaseModel):
"""Directory item information."""
name: str
path: str
is_directory: bool
is_readable: bool
class DirectoryListingResponse(BaseModel):
"""Directory listing response."""
current_path: str
parent_path: Optional[str] = None
items: List[DirectoryItem]
@router.get("/browse", response_model=DirectoryListingResponse)
async def browse_directory(path: str = "/"):
"""Browse filesystem directory."""
try:
path = os.path.abspath(path)
if not os.path.exists(path):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Path does not exist: {path}"
)
if not os.path.isdir(path):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Path is not a directory: {path}"
)
parent_path = os.path.dirname(path) if path != "/" else None
items = []
try:
entries = os.listdir(path)
entries.sort()
for entry in entries:
entry_path = os.path.join(path, entry)
try:
is_dir = os.path.isdir(entry_path)
is_readable = os.access(entry_path, os.R_OK)
if is_dir:
items.append(DirectoryItem(
name=entry,
path=entry_path,
is_directory=True,
is_readable=is_readable
))
except (PermissionError, OSError) as e:
logger.debug(f"Cannot access {entry_path}: {e}")
continue
except PermissionError:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Permission denied: {path}"
)
return DirectoryListingResponse(
current_path=path,
parent_path=parent_path,
items=items
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error browsing directory {path}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error browsing directory: {str(e)}"
)
@router.get("/common-paths", response_model=List[DirectoryItem])
async def get_common_paths():
"""Get list of common starting paths."""
common_paths = ["/", "/home", "/media", "/mnt", "/opt", "/srv", "/var", "/tmp"]
items = []
for path in common_paths:
if os.path.exists(path) and os.path.isdir(path):
try:
is_readable = os.access(path, os.R_OK)
items.append(DirectoryItem(
name=path,
path=path,
is_directory=True,
is_readable=is_readable
))
except (PermissionError, OSError):
continue
return items

379
backend/api/jobs.py Normal file
View File

@@ -0,0 +1,379 @@
"""Job management API routes."""
import logging
from typing import List, Optional
from fastapi import APIRouter, HTTPException, Query, status
from pydantic import BaseModel, Field
from backend.core.models import JobStatus, QualityPreset
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/jobs", tags=["jobs"])
# === REQUEST/RESPONSE MODELS ===
class JobCreateRequest(BaseModel):
"""Request to create a new job."""
file_path: str = Field(..., description="Full path to the media file")
file_name: str = Field(..., description="Name of the file")
source_lang: Optional[str] = Field(None, description="Source language (ISO 639-2)")
target_lang: str = Field(..., description="Target subtitle language (ISO 639-2)")
quality_preset: str = Field("fast", description="Quality preset: fast, balanced, best")
transcribe_or_translate: str = Field("transcribe", description="Operation: transcribe or translate")
priority: int = Field(0, description="Job priority (higher = processed first)")
is_manual_request: bool = Field(True, description="Whether this is a manual request")
class Config:
json_schema_extra = {
"example": {
"file_path": "/media/anime/Attack on Titan S04E01.mkv",
"file_name": "Attack on Titan S04E01.mkv",
"source_lang": "jpn",
"target_lang": "spa",
"quality_preset": "fast",
"transcribe_or_translate": "transcribe",
"priority": 10
}
}
class JobResponse(BaseModel):
"""Job response model."""
id: str
file_path: str
file_name: str
job_type: str = "transcription" # Default to transcription for backward compatibility
status: str
priority: int
source_lang: Optional[str]
target_lang: Optional[str]
quality_preset: Optional[str]
transcribe_or_translate: str
progress: float
current_stage: Optional[str]
eta_seconds: Optional[int]
created_at: Optional[str]
started_at: Optional[str]
completed_at: Optional[str]
output_path: Optional[str]
segments_count: Optional[int]
error: Optional[str]
retry_count: int
worker_id: Optional[str]
vram_used_mb: Optional[int]
processing_time_seconds: Optional[float]
model_used: Optional[str]
device_used: Optional[str]
class JobListResponse(BaseModel):
"""Job list response with pagination."""
jobs: List[JobResponse]
total: int
page: int
page_size: int
class QueueStatsResponse(BaseModel):
"""Queue statistics response."""
total_jobs: int
queued: int
processing: int
completed: int
failed: int
cancelled: int
class MessageResponse(BaseModel):
"""Generic message response."""
message: str
# === ROUTES ===
@router.get("/", response_model=JobListResponse)
async def get_jobs(
status_filter: Optional[str] = Query(None, description="Filter by status"),
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(50, ge=1, le=500, description="Items per page"),
):
"""
Get list of jobs with optional filtering and pagination.
Args:
status_filter: Filter by job status (queued/processing/completed/failed/cancelled)
page: Page number (1-based)
page_size: Number of items per page
Returns:
Paginated list of jobs
"""
from backend.core.queue_manager import queue_manager
# Validate status filter
status_enum = None
if status_filter:
try:
status_enum = JobStatus(status_filter.lower())
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid status: {status_filter}"
)
# Get jobs
jobs = queue_manager.get_all_jobs(
status_filter=status_enum,
limit=page_size,
offset=(page - 1) * page_size
)
# Get total count
total = queue_manager.count_jobs(status_filter=status_enum)
return JobListResponse(
jobs=[JobResponse(**job.to_dict()) for job in jobs],
total=total,
page=page,
page_size=page_size
)
@router.get("/stats", response_model=QueueStatsResponse)
async def get_queue_stats():
"""
Get queue statistics.
Returns:
Queue statistics
"""
from backend.core.queue_manager import queue_manager
stats = queue_manager.get_queue_stats()
return QueueStatsResponse(**stats)
@router.get("/{job_id}", response_model=JobResponse)
async def get_job(job_id: str):
"""
Get a specific job by ID.
Args:
job_id: Job ID
Returns:
Job object
Raises:
404: Job not found
"""
from backend.core.database import database
from backend.core.models import Job
with database.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
if not job:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Job {job_id} not found"
)
job_dict = job.to_dict()
return JobResponse(**job_dict)
@router.post("/", response_model=JobResponse, status_code=status.HTTP_201_CREATED)
async def create_job(request: JobCreateRequest):
"""
Create a new transcription job.
Args:
request: Job creation request
Returns:
Created job object
Raises:
400: Invalid quality preset
409: Job already exists for this file
"""
from backend.core.queue_manager import queue_manager
# Validate quality preset
try:
quality = QualityPreset(request.quality_preset.lower())
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid quality preset: {request.quality_preset}"
)
# Create job
job = queue_manager.add_job(
file_path=request.file_path,
file_name=request.file_name,
source_lang=request.source_lang,
target_lang=request.target_lang,
quality_preset=quality,
transcribe_or_translate=request.transcribe_or_translate,
priority=request.priority,
is_manual_request=request.is_manual_request,
)
if not job:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Job already exists for {request.file_name}"
)
logger.info(f"Job {job.id} created via API for {request.file_name}")
return JobResponse(**job.to_dict())
@router.post("/{job_id}/retry", response_model=JobResponse)
async def retry_job(job_id: str):
"""
Retry a failed job.
Args:
job_id: Job ID to retry
Returns:
Updated job object
Raises:
404: Job not found
400: Job cannot be retried
"""
from backend.core.queue_manager import queue_manager
from backend.core.database import database
from backend.core.models import Job, JobStatus
# Check if job exists and can be retried (within session)
with database.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
if not job:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Job {job_id} not found"
)
# Access attributes while session is active
can_retry = job.status == JobStatus.FAILED
current_status = job.status.value
if not can_retry:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job {job_id} cannot be retried (status={current_status})"
)
# Reset job to queued
success = queue_manager.retry_job(job_id)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retry job {job_id}"
)
# Get updated job and return
with database.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
job_dict = job.to_dict() if job else {}
logger.info(f"Job {job_id} retried via API")
return JobResponse(**job_dict)
@router.delete("/{job_id}", response_model=MessageResponse)
async def cancel_job(job_id: str):
"""
Cancel a job.
Args:
job_id: Job ID to cancel
Returns:
Success message
Raises:
404: Job not found
400: Job already completed
"""
from backend.core.queue_manager import queue_manager
from backend.core.database import database
from backend.core.models import Job, JobStatus
# Check if job exists and can be cancelled (within session)
with database.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
if not job:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Job {job_id} not found"
)
# Access attributes while session is active
is_terminal = job.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
current_status = job.status.value
if is_terminal:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job {job_id} is already in terminal state: {current_status}"
)
# Cancel job
success = queue_manager.cancel_job(job_id)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to cancel job {job_id}"
)
logger.info(f"Job {job_id} cancelled via API")
return MessageResponse(message=f"Job {job_id} cancelled successfully")
@router.post("/{job_id}/cancel", response_model=MessageResponse)
async def cancel_job_post(job_id: str):
"""
Cancel a job (POST alias).
Args:
job_id: Job ID to cancel
Returns:
Success message
"""
# Reuse the delete endpoint logic
return await cancel_job(job_id)
@router.post("/queue/clear", response_model=MessageResponse)
async def clear_completed_jobs():
"""
Clear all completed jobs from the queue.
Returns:
Success message with count of cleared jobs
"""
from backend.core.queue_manager import queue_manager
count = queue_manager.clear_completed_jobs()
logger.info(f"Cleared {count} completed jobs via API")
return MessageResponse(message=f"Cleared {count} completed jobs")

351
backend/api/scan_rules.py Normal file
View File

@@ -0,0 +1,351 @@
"""Scan rules management API routes."""
import logging
from typing import List, Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/scan-rules", tags=["scan-rules"])
# === REQUEST/RESPONSE MODELS ===
class ScanRuleConditions(BaseModel):
"""Scan rule conditions."""
audio_language_is: Optional[str] = Field(None, description="Audio language must be (ISO 639-2)")
audio_language_not: Optional[str] = Field(None, description="Audio language must NOT be (comma-separated)")
audio_track_count_min: Optional[int] = Field(None, description="Minimum number of audio tracks")
has_embedded_subtitle_lang: Optional[str] = Field(None, description="Must have embedded subtitle in language")
missing_embedded_subtitle_lang: Optional[str] = Field(None, description="Must NOT have embedded subtitle")
missing_external_subtitle_lang: Optional[str] = Field(None, description="Must NOT have external .srt file")
file_extension: Optional[str] = Field(None, description="File extensions filter (comma-separated)")
class ScanRuleAction(BaseModel):
"""Scan rule action."""
action_type: str = Field("transcribe", description="Action type: transcribe or translate")
target_language: str = Field(..., description="Target subtitle language (ISO 639-2)")
quality_preset: str = Field("fast", description="Quality preset: fast, balanced, best")
job_priority: int = Field(0, description="Priority for created jobs")
class ScanRuleCreateRequest(BaseModel):
"""Request to create a scan rule."""
name: str = Field(..., description="Rule name")
enabled: bool = Field(True, description="Whether rule is enabled")
priority: int = Field(0, description="Rule priority (higher = evaluated first)")
conditions: ScanRuleConditions
action: ScanRuleAction
class Config:
json_schema_extra = {
"example": {
"name": "Japanese anime without Spanish subs",
"enabled": True,
"priority": 10,
"conditions": {
"audio_language_is": "jpn",
"missing_embedded_subtitle_lang": "spa",
"missing_external_subtitle_lang": "spa",
"file_extension": ".mkv,.mp4"
},
"action": {
"action_type": "transcribe",
"target_language": "spa",
"quality_preset": "fast",
"job_priority": 5
}
}
}
class ScanRuleUpdateRequest(BaseModel):
"""Request to update a scan rule."""
name: Optional[str] = Field(None, description="Rule name")
enabled: Optional[bool] = Field(None, description="Whether rule is enabled")
priority: Optional[int] = Field(None, description="Rule priority")
conditions: Optional[ScanRuleConditions] = None
action: Optional[ScanRuleAction] = None
class ScanRuleResponse(BaseModel):
"""Scan rule response."""
id: int
name: str
enabled: bool
priority: int
conditions: dict
action: dict
created_at: Optional[str]
updated_at: Optional[str]
class MessageResponse(BaseModel):
"""Generic message response."""
message: str
# === ROUTES ===
@router.get("/", response_model=List[ScanRuleResponse])
async def get_all_rules(enabled_only: bool = False):
"""
Get all scan rules.
Args:
enabled_only: Only return enabled rules
Returns:
List of scan rules (ordered by priority DESC)
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
query = session.query(ScanRule)
if enabled_only:
query = query.filter(ScanRule.enabled == True)
rules = query.order_by(ScanRule.priority.desc()).all()
return [ScanRuleResponse(**rule.to_dict()) for rule in rules]
@router.get("/{rule_id}", response_model=ScanRuleResponse)
async def get_rule(rule_id: int):
"""
Get a specific scan rule.
Args:
rule_id: Rule ID
Returns:
Scan rule object
Raises:
404: Rule not found
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
rule = session.query(ScanRule).filter(ScanRule.id == rule_id).first()
if not rule:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Scan rule {rule_id} not found"
)
return ScanRuleResponse(**rule.to_dict())
@router.post("/", response_model=ScanRuleResponse, status_code=status.HTTP_201_CREATED)
async def create_rule(request: ScanRuleCreateRequest):
"""
Create a new scan rule.
Args:
request: Rule creation request
Returns:
Created rule object
Raises:
400: Invalid data
409: Rule with same name already exists
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
# Check for duplicate name
existing = session.query(ScanRule).filter(ScanRule.name == request.name).first()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Scan rule with name '{request.name}' already exists"
)
# Create rule
rule = ScanRule(
name=request.name,
enabled=request.enabled,
priority=request.priority,
# Conditions
audio_language_is=request.conditions.audio_language_is,
audio_language_not=request.conditions.audio_language_not,
audio_track_count_min=request.conditions.audio_track_count_min,
has_embedded_subtitle_lang=request.conditions.has_embedded_subtitle_lang,
missing_embedded_subtitle_lang=request.conditions.missing_embedded_subtitle_lang,
missing_external_subtitle_lang=request.conditions.missing_external_subtitle_lang,
file_extension=request.conditions.file_extension,
# Action
action_type=request.action.action_type,
target_language=request.action.target_language,
quality_preset=request.action.quality_preset,
job_priority=request.action.job_priority,
)
session.add(rule)
session.commit()
session.refresh(rule)
logger.info(f"Scan rule created via API: {rule.name} (ID: {rule.id})")
return ScanRuleResponse(**rule.to_dict())
@router.put("/{rule_id}", response_model=ScanRuleResponse)
async def update_rule(rule_id: int, request: ScanRuleUpdateRequest):
"""
Update a scan rule.
Args:
rule_id: Rule ID to update
request: Rule update request
Returns:
Updated rule object
Raises:
404: Rule not found
409: Name already exists
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
rule = session.query(ScanRule).filter(ScanRule.id == rule_id).first()
if not rule:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Scan rule {rule_id} not found"
)
# Check for duplicate name
if request.name and request.name != rule.name:
existing = session.query(ScanRule).filter(ScanRule.name == request.name).first()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Scan rule with name '{request.name}' already exists"
)
# Update fields
if request.name is not None:
rule.name = request.name
if request.enabled is not None:
rule.enabled = request.enabled
if request.priority is not None:
rule.priority = request.priority
# Update conditions
if request.conditions:
if request.conditions.audio_language_is is not None:
rule.audio_language_is = request.conditions.audio_language_is
if request.conditions.audio_language_not is not None:
rule.audio_language_not = request.conditions.audio_language_not
if request.conditions.audio_track_count_min is not None:
rule.audio_track_count_min = request.conditions.audio_track_count_min
if request.conditions.has_embedded_subtitle_lang is not None:
rule.has_embedded_subtitle_lang = request.conditions.has_embedded_subtitle_lang
if request.conditions.missing_embedded_subtitle_lang is not None:
rule.missing_embedded_subtitle_lang = request.conditions.missing_embedded_subtitle_lang
if request.conditions.missing_external_subtitle_lang is not None:
rule.missing_external_subtitle_lang = request.conditions.missing_external_subtitle_lang
if request.conditions.file_extension is not None:
rule.file_extension = request.conditions.file_extension
# Update action
if request.action:
if request.action.action_type is not None:
rule.action_type = request.action.action_type
if request.action.target_language is not None:
rule.target_language = request.action.target_language
if request.action.quality_preset is not None:
rule.quality_preset = request.action.quality_preset
if request.action.job_priority is not None:
rule.job_priority = request.action.job_priority
session.commit()
session.refresh(rule)
logger.info(f"Scan rule updated via API: {rule.name} (ID: {rule.id})")
return ScanRuleResponse(**rule.to_dict())
@router.delete("/{rule_id}", response_model=MessageResponse)
async def delete_rule(rule_id: int):
"""
Delete a scan rule.
Args:
rule_id: Rule ID to delete
Returns:
Success message
Raises:
404: Rule not found
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
rule = session.query(ScanRule).filter(ScanRule.id == rule_id).first()
if not rule:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Scan rule {rule_id} not found"
)
rule_name = rule.name
session.delete(rule)
session.commit()
logger.info(f"Scan rule deleted via API: {rule_name} (ID: {rule_id})")
return MessageResponse(message=f"Scan rule {rule_id} deleted successfully")
@router.post("/{rule_id}/toggle", response_model=ScanRuleResponse)
async def toggle_rule(rule_id: int):
"""
Toggle a scan rule enabled/disabled.
Args:
rule_id: Rule ID to toggle
Returns:
Updated rule object
Raises:
404: Rule not found
"""
from backend.core.database import database
from backend.scanning.models import ScanRule
with database.get_session() as session:
rule = session.query(ScanRule).filter(ScanRule.id == rule_id).first()
if not rule:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Scan rule {rule_id} not found"
)
rule.enabled = not rule.enabled
session.commit()
session.refresh(rule)
logger.info(f"Scan rule toggled via API: {rule.name} -> {'enabled' if rule.enabled else 'disabled'}")
return ScanRuleResponse(**rule.to_dict())

312
backend/api/scanner.py Normal file
View File

@@ -0,0 +1,312 @@
"""Library scanner API routes."""
import logging
from typing import List, Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/scanner", tags=["scanner"])
# === REQUEST/RESPONSE MODELS ===
class ScanRequest(BaseModel):
"""Request to scan paths."""
paths: List[str] = Field(..., description="Paths to scan")
recursive: bool = Field(True, description="Scan subdirectories")
class Config:
json_schema_extra = {
"example": {
"paths": ["/media/anime", "/media/movies"],
"recursive": True
}
}
class ScanResult(BaseModel):
"""Scan result summary."""
scanned_files: int
matched_files: int
jobs_created: int
skipped_files: int
paths_scanned: List[str]
class ScheduleConfig(BaseModel):
"""Scanner schedule configuration."""
enabled: bool = Field(..., description="Enable scheduled scanning")
cron_expression: str = Field(..., description="Cron expression for schedule")
paths: List[str] = Field(..., description="Paths to scan")
recursive: bool = Field(True, description="Scan subdirectories")
class Config:
json_schema_extra = {
"example": {
"enabled": True,
"cron_expression": "0 2 * * *", # Daily at 2 AM
"paths": ["/media/anime", "/media/movies"],
"recursive": True
}
}
class WatcherConfig(BaseModel):
"""File watcher configuration."""
enabled: bool = Field(..., description="Enable file watcher")
paths: List[str] = Field(..., description="Paths to watch")
recursive: bool = Field(True, description="Watch subdirectories")
class Config:
json_schema_extra = {
"example": {
"enabled": True,
"paths": ["/media/anime", "/media/movies"],
"recursive": True
}
}
class ScannerStatus(BaseModel):
"""Scanner status response."""
scheduler_enabled: bool
scheduler_running: bool
next_scan_time: Optional[str]
watcher_enabled: bool
watcher_running: bool
watched_paths: List[str]
last_scan_time: Optional[str]
total_scans: int
class MessageResponse(BaseModel):
"""Generic message response."""
message: str
# === ROUTES ===
@router.get("/status", response_model=ScannerStatus)
async def get_scanner_status():
"""
Get library scanner status.
Returns:
Scanner status information
"""
from backend.scanning.library_scanner import library_scanner
status_dict = library_scanner.get_status()
return ScannerStatus(**status_dict)
@router.post("/scan", response_model=ScanResult)
async def scan_paths(request: Optional[ScanRequest] = None):
"""
Manually trigger a library scan.
Args:
request: Optional scan request with paths. If not provided, uses library_paths from settings.
Returns:
Scan result summary
"""
from backend.scanning.library_scanner import library_scanner
from backend.core.settings_service import settings_service
# Use request paths or load from settings
if request is None:
library_paths = settings_service.get('library_paths', '')
# Handle both string (comma-separated) and list types
if isinstance(library_paths, list):
paths = [p.strip() for p in library_paths if p and p.strip()]
elif isinstance(library_paths, str) and library_paths:
paths = [p.strip() for p in library_paths.split(',') if p.strip()]
else:
paths = []
recursive = True
else:
paths = request.paths
recursive = request.recursive
if not paths:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No library paths configured. Please configure library_paths in settings."
)
logger.info(f"Manual scan triggered via API: {paths}")
result = library_scanner.scan_paths(
paths=paths,
recursive=recursive
)
return ScanResult(**result)
@router.post("/scheduler/start", response_model=MessageResponse)
async def start_scheduler(config: Optional[ScheduleConfig] = None):
"""
Start scheduled scanning.
Args:
config: Optional scheduler configuration. If not provided, uses settings from database.
Returns:
Success message
"""
from backend.scanning.library_scanner import library_scanner
from backend.core.settings_service import settings_service
try:
# Use config from request or load from settings
if config is None:
# Load interval from database settings (in minutes)
interval_minutes = settings_service.get('scanner_schedule_interval_minutes', 360) # Default: 6 hours
interval_minutes = int(interval_minutes) if interval_minutes else 360
else:
# Convert cron to interval (simplified - just use 360 minutes for now)
interval_minutes = 360
if interval_minutes <= 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid scanner interval. Must be greater than 0 minutes."
)
library_scanner.start_scheduler(interval_minutes=interval_minutes)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
logger.info(f"Scheduler started via API with interval: {interval_minutes} minutes")
return MessageResponse(message=f"Scheduler started successfully (every {interval_minutes} minutes)")
@router.post("/scheduler/stop", response_model=MessageResponse)
async def stop_scheduler():
"""
Stop scheduled scanning.
Returns:
Success message
"""
from backend.scanning.library_scanner import library_scanner
library_scanner.stop_scheduler()
logger.info("Scheduler stopped via API")
return MessageResponse(message="Scheduler stopped successfully")
@router.post("/watcher/start", response_model=MessageResponse)
async def start_watcher(config: Optional[WatcherConfig] = None):
"""
Start file watcher.
Args:
config: Optional watcher configuration. If not provided, uses settings from database.
Returns:
Success message
"""
from backend.scanning.library_scanner import library_scanner
from backend.core.settings_service import settings_service
# Use config from request or load from settings
if config is None:
library_paths = settings_service.get('library_paths', '')
# Handle both string (comma-separated) and list types
if isinstance(library_paths, list):
paths = [p.strip() for p in library_paths if p and p.strip()]
elif isinstance(library_paths, str) and library_paths:
paths = [p.strip() for p in library_paths.split(',') if p.strip()]
else:
paths = []
recursive = True
else:
paths = config.paths
recursive = config.recursive
if not paths:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No library paths configured. Please configure library_paths in settings."
)
library_scanner.start_file_watcher(
paths=paths,
recursive=recursive
)
logger.info(f"File watcher started via API: {paths}")
return MessageResponse(message="File watcher started successfully")
@router.post("/watcher/stop", response_model=MessageResponse)
async def stop_watcher():
"""
Stop file watcher.
Returns:
Success message
"""
from backend.scanning.library_scanner import library_scanner
library_scanner.stop_file_watcher()
logger.info("File watcher stopped via API")
return MessageResponse(message="File watcher stopped successfully")
@router.post("/analyze", response_model=dict)
async def analyze_file(file_path: str):
"""
Analyze a single file.
Args:
file_path: Path to file to analyze
Returns:
File analysis result
Raises:
400: Invalid file path
404: File not found
"""
from backend.scanning.file_analyzer import FileAnalyzer
import os
if not os.path.exists(file_path):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File not found: {file_path}"
)
if not os.path.isfile(file_path):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Path is not a file: {file_path}"
)
analyzer = FileAnalyzer()
try:
analysis = analyzer.analyze(file_path)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to analyze file: {str(e)}"
)
return analysis.to_dict()

323
backend/api/settings.py Normal file
View File

@@ -0,0 +1,323 @@
"""Settings management API routes."""
import logging
from typing import List, Optional
from fastapi import APIRouter, HTTPException, Query, status
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/settings", tags=["settings"])
# === REQUEST/RESPONSE MODELS ===
class SettingResponse(BaseModel):
"""Setting response model."""
id: int
key: str
value: Optional[str]
description: Optional[str]
category: Optional[str]
value_type: Optional[str]
created_at: Optional[str]
updated_at: Optional[str]
class SettingUpdateRequest(BaseModel):
"""Setting update request."""
value: str = Field(..., description="New value (as string)")
class Config:
json_schema_extra = {
"example": {
"value": "true"
}
}
class SettingCreateRequest(BaseModel):
"""Setting create request."""
key: str = Field(..., description="Setting key")
value: Optional[str] = Field(None, description="Setting value")
description: Optional[str] = Field(None, description="Description")
category: Optional[str] = Field(None, description="Category")
value_type: Optional[str] = Field("string", description="Value type")
class Config:
json_schema_extra = {
"example": {
"key": "custom_setting",
"value": "value",
"description": "Custom setting description",
"category": "general",
"value_type": "string"
}
}
class BulkUpdateRequest(BaseModel):
"""Bulk update request."""
settings: dict = Field(..., description="Dictionary of key-value pairs")
class Config:
json_schema_extra = {
"example": {
"settings": {
"worker_cpu_count": "2",
"worker_gpu_count": "1",
"scanner_enabled": "true"
}
}
}
class MessageResponse(BaseModel):
"""Generic message response."""
message: str
# === ROUTES ===
@router.get("/", response_model=List[SettingResponse])
async def get_all_settings(category: Optional[str] = Query(None, description="Filter by category")):
"""
Get all settings or filter by category.
Args:
category: Optional category filter (general, workers, transcription, scanner, bazarr)
Returns:
List of settings
"""
from backend.core.settings_service import settings_service
if category:
settings = settings_service.get_by_category(category)
else:
settings = settings_service.get_all()
return [SettingResponse(**s.to_dict()) for s in settings]
@router.get("/{key}", response_model=SettingResponse)
async def get_setting(key: str):
"""
Get a specific setting by key.
Args:
key: Setting key
Returns:
Setting object
Raises:
404: Setting not found
"""
from backend.core.database import database
from backend.core.settings_model import SystemSettings
with database.get_session() as session:
setting = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if not setting:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Setting '{key}' not found"
)
return SettingResponse(**setting.to_dict())
@router.put("/{key}", response_model=SettingResponse)
async def update_setting(key: str, request: SettingUpdateRequest):
"""
Update a setting value.
Args:
key: Setting key
request: Update request with new value
Returns:
Updated setting object
Raises:
404: Setting not found
400: Invalid value (e.g., GPU workers without GPU)
"""
from backend.core.settings_service import settings_service
from backend.core.database import database
from backend.core.settings_model import SystemSettings
from backend.core.system_monitor import system_monitor
value = request.value
# Validate GPU worker count - force to 0 if no GPU available
if key == 'worker_gpu_count':
gpu_count = int(value) if value else 0
if gpu_count > 0 and system_monitor.gpu_count == 0:
logger.warning(
f"Attempted to set worker_gpu_count={gpu_count} but no GPU detected. "
"Forcing to 0."
)
value = '0'
success = settings_service.set(key, value)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update setting '{key}'"
)
# Return updated setting
with database.get_session() as session:
setting = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if not setting:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Setting '{key}' not found"
)
return SettingResponse(**setting.to_dict())
@router.post("/bulk-update", response_model=MessageResponse)
async def bulk_update_settings(request: BulkUpdateRequest):
"""
Update multiple settings at once.
Args:
request: Bulk update request with settings dictionary
Returns:
Success message
"""
from backend.core.settings_service import settings_service
from backend.core.system_monitor import system_monitor
# Validate GPU worker count - force to 0 if no GPU available
settings_to_update = request.settings.copy()
if 'worker_gpu_count' in settings_to_update:
gpu_count = int(settings_to_update.get('worker_gpu_count', 0))
if gpu_count > 0 and system_monitor.gpu_count == 0:
logger.warning(
f"Attempted to set worker_gpu_count={gpu_count} but no GPU detected. "
"Forcing to 0."
)
settings_to_update['worker_gpu_count'] = '0'
success = settings_service.bulk_update(settings_to_update)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update settings"
)
logger.info(f"Bulk updated {len(request.settings)} settings")
return MessageResponse(message=f"Updated {len(request.settings)} settings successfully")
@router.post("/", response_model=SettingResponse, status_code=status.HTTP_201_CREATED)
async def create_setting(request: SettingCreateRequest):
"""
Create a new setting.
Args:
request: Create request with setting details
Returns:
Created setting object
Raises:
409: Setting already exists
"""
from backend.core.settings_service import settings_service
from backend.core.database import database
from backend.core.settings_model import SystemSettings
# Check if exists
with database.get_session() as session:
existing = session.query(SystemSettings).filter(SystemSettings.key == request.key).first()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Setting '{request.key}' already exists"
)
# Create
success = settings_service.set(
key=request.key,
value=request.value,
description=request.description,
category=request.category,
value_type=request.value_type
)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to create setting"
)
# Return created setting
with database.get_session() as session:
setting = session.query(SystemSettings).filter(SystemSettings.key == request.key).first()
return SettingResponse(**setting.to_dict())
@router.delete("/{key}", response_model=MessageResponse)
async def delete_setting(key: str):
"""
Delete a setting.
Args:
key: Setting key
Returns:
Success message
Raises:
404: Setting not found
"""
from backend.core.settings_service import settings_service
success = settings_service.delete(key)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Setting '{key}' not found"
)
logger.info(f"Setting deleted: {key}")
return MessageResponse(message=f"Setting '{key}' deleted successfully")
@router.post("/init-defaults", response_model=MessageResponse)
async def init_default_settings():
"""
Initialize default settings.
Creates all default settings if they don't exist.
Safe to call multiple times (won't overwrite existing).
Returns:
Success message
"""
from backend.core.settings_service import settings_service
try:
settings_service.init_default_settings()
return MessageResponse(message="Default settings initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize default settings: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to initialize default settings: {str(e)}"
)

313
backend/api/setup_wizard.py Normal file
View File

@@ -0,0 +1,313 @@
"""Setup wizard API endpoints."""
import logging
import secrets
from typing import List, Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/setup", tags=["setup"])
# === REQUEST/RESPONSE MODELS ===
class SetupStatusResponse(BaseModel):
"""Setup status response."""
is_first_run: bool
setup_completed: bool
class WorkerConfig(BaseModel):
"""Worker configuration."""
count: int = Field(default=1, ge=0, le=10, description="Number of workers to start")
type: str = Field(default="cpu", description="Worker type: cpu or gpu")
class ScannerConfig(BaseModel):
"""Scanner configuration."""
interval_minutes: int = Field(default=360, ge=1, le=10080, description="Scan interval in minutes")
class StandaloneSetupRequest(BaseModel):
"""Standalone mode setup request."""
library_paths: List[str] = Field(..., description="Library paths to scan")
scan_rules: List[dict] = Field(..., description="Initial scan rules")
worker_config: Optional[WorkerConfig] = Field(default=None, description="Worker configuration")
scanner_config: Optional[ScannerConfig] = Field(default=None, description="Scanner configuration")
class Config:
json_schema_extra = {
"example": {
"library_paths": ["/media/anime", "/media/movies"],
"scan_rules": [
{
"name": "Japanese to Spanish",
"audio_language_is": "jpn",
"missing_external_subtitle_lang": "spa",
"target_language": "spa",
"action_type": "transcribe"
}
],
"worker_config": {
"count": 1,
"type": "cpu"
},
"scanner_config": {
"interval_minutes": 360
}
}
}
class BazarrSlaveSetupRequest(BaseModel):
"""Bazarr slave mode setup request."""
pass # No additional config needed
class BazarrConnectionInfo(BaseModel):
"""Bazarr connection information."""
mode: str = "bazarr_slave"
host: str
port: int
api_key: str
provider_url: str
class SetupCompleteResponse(BaseModel):
"""Setup complete response."""
success: bool
message: str
bazarr_info: Optional[BazarrConnectionInfo] = None
# === ROUTES ===
@router.get("/status", response_model=SetupStatusResponse)
async def get_setup_status():
"""
Check if this is the first run or setup is completed.
Returns:
Setup status
"""
from backend.core.settings_service import settings_service
# Check if setup_completed setting exists
setup_completed = settings_service.get("setup_completed", None)
return SetupStatusResponse(
is_first_run=setup_completed is None,
setup_completed=setup_completed == "true" if setup_completed else False
)
@router.post("/standalone", response_model=SetupCompleteResponse)
async def setup_standalone_mode(request: StandaloneSetupRequest):
"""
Configure standalone mode with library paths and scan rules.
Args:
request: Standalone setup configuration
Returns:
Setup completion status
"""
from backend.core.settings_service import settings_service
from backend.core.database import database
from backend.scanning.models import ScanRule
try:
# Set operation mode
settings_service.set("operation_mode", "standalone",
description="Operation mode",
category="general",
value_type="string")
# Set library paths
library_paths_str = ",".join(request.library_paths)
settings_service.set("library_paths", library_paths_str,
description="Library paths to scan",
category="general",
value_type="list")
# Enable scanner by default
settings_service.set("scanner_enabled", "true",
description="Enable library scanner",
category="scanner",
value_type="boolean")
# Configure scanner interval if provided
if request.scanner_config:
settings_service.set("scanner_schedule_interval_minutes",
str(request.scanner_config.interval_minutes),
description="Scanner interval in minutes",
category="scanner",
value_type="integer")
else:
# Default: 6 hours
settings_service.set("scanner_schedule_interval_minutes", "360",
description="Scanner interval in minutes",
category="scanner",
value_type="integer")
# Configure worker auto-start if provided
if request.worker_config:
settings_service.set("worker_auto_start_count",
str(request.worker_config.count),
description="Number of workers to start automatically",
category="workers",
value_type="integer")
settings_service.set("worker_auto_start_type",
request.worker_config.type,
description="Type of workers to start (cpu/gpu)",
category="workers",
value_type="string")
else:
# Default: 1 CPU worker
settings_service.set("worker_auto_start_count", "1",
description="Number of workers to start automatically",
category="workers",
value_type="integer")
settings_service.set("worker_auto_start_type", "cpu",
description="Type of workers to start (cpu/gpu)",
category="workers",
value_type="string")
# Create scan rules
with database.get_session() as session:
for idx, rule_data in enumerate(request.scan_rules):
rule = ScanRule(
name=rule_data.get("name", f"Rule {idx + 1}"),
enabled=True,
priority=rule_data.get("priority", 10),
audio_language_is=rule_data.get("audio_language_is"),
audio_language_not=rule_data.get("audio_language_not"),
audio_track_count_min=rule_data.get("audio_track_count_min"),
has_embedded_subtitle_lang=rule_data.get("has_embedded_subtitle_lang"),
missing_embedded_subtitle_lang=rule_data.get("missing_embedded_subtitle_lang"),
missing_external_subtitle_lang=rule_data.get("missing_external_subtitle_lang"),
file_extension=rule_data.get("file_extension", ".mkv,.mp4,.avi"),
action_type=rule_data.get("action_type", "transcribe"),
target_language=rule_data.get("target_language", "spa"),
quality_preset=rule_data.get("quality_preset", "fast"),
job_priority=rule_data.get("job_priority", 5)
)
session.add(rule)
session.commit()
# Mark setup as completed
settings_service.set("setup_completed", "true",
description="Setup wizard completed",
category="general",
value_type="boolean")
logger.info("Standalone mode setup completed successfully")
return SetupCompleteResponse(
success=True,
message="Standalone mode configured successfully"
)
except Exception as e:
logger.error(f"Failed to setup standalone mode: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Setup failed: {str(e)}"
)
@router.post("/bazarr-slave", response_model=SetupCompleteResponse)
async def setup_bazarr_slave_mode(request: BazarrSlaveSetupRequest):
"""
Configure Bazarr slave mode and generate API key.
Args:
request: Bazarr slave setup configuration
Returns:
Setup completion status with connection info
"""
from backend.core.settings_service import settings_service
try:
# Set operation mode
settings_service.set("operation_mode", "bazarr_slave",
description="Operation mode",
category="general",
value_type="string")
# Generate API key
api_key = secrets.token_urlsafe(32)
settings_service.set("bazarr_api_key", api_key,
description="Bazarr provider API key",
category="bazarr",
value_type="string")
# Enable Bazarr provider
settings_service.set("bazarr_provider_enabled", "true",
description="Enable Bazarr provider mode",
category="bazarr",
value_type="boolean")
# Disable scanner (not needed in slave mode)
settings_service.set("scanner_enabled", "false",
description="Enable library scanner",
category="scanner",
value_type="boolean")
# Mark setup as completed
settings_service.set("setup_completed", "true",
description="Setup wizard completed",
category="general",
value_type="boolean")
# Get host and port from settings
host = getattr(app_settings, "API_HOST", "0.0.0.0")
port = getattr(app_settings, "API_PORT", 8000)
# Create connection info
bazarr_info = BazarrConnectionInfo(
mode="bazarr_slave",
host=host if host != "0.0.0.0" else "127.0.0.1",
port=port,
api_key=api_key,
provider_url=f"http://{host if host != '0.0.0.0' else '127.0.0.1'}:{port}"
)
logger.info("Bazarr slave mode setup completed successfully")
return SetupCompleteResponse(
success=True,
message="Bazarr slave mode configured successfully",
bazarr_info=bazarr_info
)
except Exception as e:
logger.error(f"Failed to setup Bazarr slave mode: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Setup failed: {str(e)}"
)
@router.post("/skip")
async def skip_setup():
"""
Skip setup wizard (for advanced users).
Returns:
Success message
"""
from backend.core.settings_service import settings_service
settings_service.set("setup_completed", "true",
description="Setup wizard completed",
category="general",
value_type="boolean")
logger.info("Setup wizard skipped")
return {"message": "Setup wizard skipped"}

210
backend/api/system.py Normal file
View File

@@ -0,0 +1,210 @@
"""System resources monitoring API."""
import logging
import psutil
from typing import List, Optional
from fastapi import APIRouter
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/system", tags=["system"])
# === RESPONSE MODELS ===
class CPUInfo(BaseModel):
"""CPU information."""
usage_percent: float
count_logical: int
count_physical: int
frequency_mhz: Optional[float] = None
class MemoryInfo(BaseModel):
"""Memory information."""
total_gb: float
used_gb: float
free_gb: float
usage_percent: float
class GPUInfo(BaseModel):
"""GPU information."""
id: int
name: str
memory_total_mb: Optional[int] = None
memory_used_mb: Optional[int] = None
memory_free_mb: Optional[int] = None
utilization_percent: Optional[int] = None
class SystemResourcesResponse(BaseModel):
"""System resources response."""
cpu: CPUInfo
memory: MemoryInfo
gpus: List[GPUInfo]
# === ROUTES ===
@router.get("/resources", response_model=SystemResourcesResponse)
async def get_system_resources():
"""
Get current system resources (CPU, RAM, GPU).
Returns:
System resources information
"""
# CPU info
cpu_percent = psutil.cpu_percent(interval=0.1)
cpu_count_logical = psutil.cpu_count(logical=True)
cpu_count_physical = psutil.cpu_count(logical=False)
cpu_freq = psutil.cpu_freq()
cpu_info = CPUInfo(
usage_percent=cpu_percent,
count_logical=cpu_count_logical or 0,
count_physical=cpu_count_physical or 0,
frequency_mhz=cpu_freq.current if cpu_freq else 0
)
# Memory info
mem = psutil.virtual_memory()
memory_info = MemoryInfo(
total_gb=round(mem.total / (1024**3), 2),
used_gb=round(mem.used / (1024**3), 2),
free_gb=round(mem.available / (1024**3), 2),
usage_percent=round(mem.percent, 1)
)
# GPU info - try to detect NVIDIA GPUs
gpus = []
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
memory_info_gpu = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpus.append(GPUInfo(
id=i,
name=name if isinstance(name, str) else name.decode('utf-8'),
memory_total_mb=memory_info_gpu.total // (1024**2),
memory_used_mb=memory_info_gpu.used // (1024**2),
memory_free_mb=memory_info_gpu.free // (1024**2),
utilization_percent=utilization.gpu
))
pynvml.nvmlShutdown()
except Exception as e:
logger.debug(f"Could not get GPU info: {e}")
# No GPUs or pynvml not available
pass
return SystemResourcesResponse(
cpu=cpu_info,
memory=memory_info,
gpus=gpus
)
@router.get("/cpu", response_model=CPUInfo)
async def get_cpu_info():
"""Get CPU information."""
cpu_percent = psutil.cpu_percent(interval=0.1)
cpu_count_logical = psutil.cpu_count(logical=True)
cpu_count_physical = psutil.cpu_count(logical=False)
cpu_freq = psutil.cpu_freq()
return CPUInfo(
usage_percent=cpu_percent,
count_logical=cpu_count_logical or 0,
count_physical=cpu_count_physical or 0,
frequency_mhz=cpu_freq.current if cpu_freq else 0
)
@router.get("/memory", response_model=MemoryInfo)
async def get_memory_info():
"""Get memory information."""
mem = psutil.virtual_memory()
return MemoryInfo(
total_gb=round(mem.total / (1024**3), 2),
used_gb=round(mem.used / (1024**3), 2),
free_gb=round(mem.available / (1024**3), 2),
usage_percent=round(mem.percent, 1)
)
@router.get("/gpus", response_model=List[GPUInfo])
async def get_gpus_info():
"""Get all GPUs information."""
gpus = []
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
memory_info_gpu = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpus.append(GPUInfo(
id=i,
name=name if isinstance(name, str) else name.decode('utf-8'),
memory_total_mb=memory_info_gpu.total // (1024**2),
memory_used_mb=memory_info_gpu.used // (1024**2),
memory_free_mb=memory_info_gpu.free // (1024**2),
utilization_percent=utilization.gpu
))
pynvml.nvmlShutdown()
except Exception as e:
logger.debug(f"Could not get GPU info: {e}")
return gpus
@router.get("/gpu/{device_id}", response_model=GPUInfo)
async def get_gpu_info(device_id: int):
"""Get specific GPU information."""
try:
import pynvml
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
name = pynvml.nvmlDeviceGetName(handle)
memory_info_gpu = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpu = GPUInfo(
id=device_id,
name=name if isinstance(name, str) else name.decode('utf-8'),
memory_total_mb=memory_info_gpu.total // (1024**2),
memory_used_mb=memory_info_gpu.used // (1024**2),
memory_free_mb=memory_info_gpu.free // (1024**2),
utilization_percent=utilization.gpu
)
pynvml.nvmlShutdown()
return gpu
except Exception as e:
logger.error(f"Could not get GPU {device_id} info: {e}")
# Return basic info if can't get details
return GPUInfo(
id=device_id,
name=f"GPU {device_id}",
memory_total_mb=None,
memory_used_mb=None,
memory_free_mb=None,
utilization_percent=None
)

View File

@@ -0,0 +1,268 @@
"""Worker pool management API endpoints."""
from typing import Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
from backend.core.worker_pool import worker_pool
from backend.core.worker import WorkerType
router = APIRouter(prefix="/api/workers", tags=["workers"])
# === Request/Response Models ===
class AddWorkerRequest(BaseModel):
"""Request to add a new worker."""
type: str = Field(..., description="Worker type: 'cpu' or 'gpu'")
device_id: Optional[int] = Field(None, description="GPU device ID (required for GPU workers)")
class Config:
json_schema_extra = {
"example": {
"type": "gpu",
"device_id": 0
}
}
class AddWorkerResponse(BaseModel):
"""Response after adding a worker."""
worker_id: str
message: str
class WorkerStatusResponse(BaseModel):
"""Worker status information."""
worker_id: str
status: str
worker_type: str
device_id: Optional[int]
current_job_id: Optional[str]
jobs_completed: int
jobs_failed: int
started_at: Optional[str]
class PoolStatsResponse(BaseModel):
"""Worker pool statistics."""
pool: dict
jobs: dict
queue: dict
class HealthCheckResponse(BaseModel):
"""Health check results."""
timestamp: str
total_workers: int
dead_workers: list
restarted_workers: list
healthy: bool
# === Endpoints ===
@router.get("/", response_model=list)
async def list_workers():
"""
List all workers with their status.
Returns:
List of worker status dictionaries
"""
return worker_pool.get_all_workers_status()
@router.get("/{worker_id}", response_model=WorkerStatusResponse)
async def get_worker_status(worker_id: str):
"""
Get status of a specific worker.
Args:
worker_id: Worker ID
Returns:
Worker status information
Raises:
HTTPException: If worker not found
"""
status = worker_pool.get_worker_status(worker_id)
if not status:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Worker {worker_id} not found"
)
return status
@router.post("/", response_model=AddWorkerResponse, status_code=status.HTTP_201_CREATED)
async def add_worker(request: AddWorkerRequest):
"""
Add a new worker to the pool.
Args:
request: Worker configuration
Returns:
Worker ID and success message
Raises:
HTTPException: If invalid configuration
"""
# Validate worker type
worker_type_str = request.type.lower()
if worker_type_str not in ["cpu", "gpu"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Worker type must be 'cpu' or 'gpu'"
)
# Map to WorkerType enum
worker_type = WorkerType.CPU if worker_type_str == "cpu" else WorkerType.GPU
# Validate GPU device_id
if worker_type == WorkerType.GPU:
if request.device_id is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="device_id is required for GPU workers"
)
if request.device_id < 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="device_id must be non-negative"
)
# Add worker
try:
worker_id = worker_pool.add_worker(worker_type, request.device_id)
return AddWorkerResponse(
worker_id=worker_id,
message=f"Worker {worker_id} added successfully"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to add worker: {str(e)}"
)
@router.delete("/{worker_id}")
async def remove_worker(worker_id: str, timeout: int = 30):
"""
Remove a worker from the pool.
Args:
worker_id: Worker ID to remove
timeout: Maximum time to wait for worker to stop (seconds)
Returns:
Success message
Raises:
HTTPException: If worker not found or removal fails
"""
success = worker_pool.remove_worker(worker_id, timeout=float(timeout))
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Worker {worker_id} not found"
)
return {"message": f"Worker {worker_id} removed successfully"}
@router.get("/pool/stats", response_model=PoolStatsResponse)
async def get_pool_stats():
"""
Get overall worker pool statistics.
Returns:
Pool statistics including worker counts, job stats, and queue info
"""
return worker_pool.get_pool_stats()
@router.post("/pool/start")
async def start_pool(cpu_workers: int = 0, gpu_workers: int = 0):
"""
Start the worker pool.
Args:
cpu_workers: Number of CPU workers to start
gpu_workers: Number of GPU workers to start
Returns:
Success message
"""
worker_pool.start(cpu_workers=cpu_workers, gpu_workers=gpu_workers)
return {
"message": f"Worker pool started with {cpu_workers} CPU and {gpu_workers} GPU workers"
}
@router.post("/pool/stop")
async def stop_pool(timeout: int = 30):
"""
Stop the worker pool.
Args:
timeout: Maximum time to wait for each worker to stop (seconds)
Returns:
Success message
"""
worker_pool.stop(timeout=float(timeout))
return {"message": "Worker pool stopped successfully"}
@router.get("/pool/health", response_model=HealthCheckResponse)
async def health_check():
"""
Perform health check on all workers.
Automatically restarts dead workers if configured.
Returns:
Health check results
"""
return worker_pool.health_check()
@router.post("/pool/autoscale")
async def autoscale_pool(target_workers: int):
"""
Auto-scale worker pool to target number.
Args:
target_workers: Target number of workers
Returns:
Success message
Raises:
HTTPException: If invalid target
"""
if target_workers < 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="target_workers must be non-negative"
)
worker_pool.auto_scale(target_workers)
return {"message": f"Pool scaled to {target_workers} workers"}

329
backend/api/workers.py Normal file
View File

@@ -0,0 +1,329 @@
"""Worker management API routes."""
import logging
from typing import List, Optional
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
from backend.core.worker import WorkerType
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/workers", tags=["workers"])
# === REQUEST/RESPONSE MODELS ===
class WorkerAddRequest(BaseModel):
"""Request to add a new worker."""
worker_type: str = Field(..., description="Worker type: 'cpu' or 'gpu'")
device_id: Optional[int] = Field(None, description="GPU device ID (only for GPU workers)")
class Config:
json_schema_extra = {
"example": {
"worker_type": "gpu",
"device_id": 0
}
}
class WorkerStatusResponse(BaseModel):
"""Worker status response."""
worker_id: str
worker_type: str
device_id: Optional[int]
status: str
current_job_id: Optional[str]
jobs_completed: int
jobs_failed: int
uptime_seconds: float
current_job_progress: float
current_job_eta: Optional[int]
class WorkerPoolStatsResponse(BaseModel):
"""Worker pool statistics response."""
total_workers: int
cpu_workers: int
gpu_workers: int
idle_workers: int
busy_workers: int
stopped_workers: int
error_workers: int
total_jobs_completed: int
total_jobs_failed: int
uptime_seconds: Optional[float]
is_running: bool
class MessageResponse(BaseModel):
"""Generic message response."""
message: str
# === ROUTES ===
@router.get("/", response_model=List[WorkerStatusResponse])
async def get_all_workers():
"""
Get status of all workers.
Returns:
List of worker status objects
"""
from backend.app import worker_pool
from datetime import datetime, timezone
from dateutil import parser
workers_status = worker_pool.get_all_workers_status()
result = []
for w in workers_status:
# Calculate uptime
uptime_seconds = 0.0
if w.get("started_at"):
try:
started = parser.parse(w["started_at"])
# Remove timezone info for comparison if needed
if started.tzinfo is None:
from datetime import timezone
started = started.replace(tzinfo=timezone.utc)
uptime_seconds = (datetime.now(timezone.utc) - started).total_seconds()
except Exception as e:
logger.warning(f"Failed to parse started_at: {e}")
uptime_seconds = 0.0
result.append(
WorkerStatusResponse(
worker_id=w["worker_id"],
worker_type=w["type"],
device_id=w.get("device_id"),
status=w["status"],
current_job_id=w.get("current_job_id"),
jobs_completed=w["jobs_completed"],
jobs_failed=w["jobs_failed"],
uptime_seconds=uptime_seconds,
current_job_progress=w.get("current_job_progress", 0.0),
current_job_eta=w.get("current_job_eta"),
)
)
return result
@router.get("/stats", response_model=WorkerPoolStatsResponse)
async def get_pool_stats():
"""
Get worker pool statistics.
Returns:
Pool statistics object
"""
from backend.app import worker_pool
from datetime import datetime, timezone
from dateutil import parser
stats = worker_pool.get_pool_stats()
pool_stats = stats.get('pool', {})
jobs_stats = stats.get('jobs', {})
# Calculate uptime
uptime_seconds = 0.0
if pool_stats.get('started_at'):
try:
started = parser.parse(pool_stats['started_at'])
# Remove timezone info for comparison if needed
if started.tzinfo is None:
from datetime import timezone
started = started.replace(tzinfo=timezone.utc)
uptime_seconds = (datetime.now(timezone.utc) - started).total_seconds()
except Exception as e:
logger.warning(f"Failed to parse pool started_at: {e}")
uptime_seconds = 0.0
return WorkerPoolStatsResponse(
total_workers=pool_stats.get('total_workers', 0),
cpu_workers=pool_stats.get('cpu_workers', 0),
gpu_workers=pool_stats.get('gpu_workers', 0),
idle_workers=pool_stats.get('idle_workers', 0),
busy_workers=pool_stats.get('busy_workers', 0),
stopped_workers=pool_stats.get('stopped_workers', 0),
error_workers=pool_stats.get('error_workers', 0),
total_jobs_completed=jobs_stats.get('completed', 0),
total_jobs_failed=jobs_stats.get('failed', 0),
uptime_seconds=uptime_seconds,
is_running=pool_stats.get('is_running', False)
)
@router.get("/{worker_id}", response_model=WorkerStatusResponse)
async def get_worker(worker_id: str):
"""
Get status of a specific worker.
Args:
worker_id: Worker ID
Returns:
Worker status object
Raises:
404: Worker not found
"""
from backend.app import worker_pool
status_dict = worker_pool.get_worker_status(worker_id)
if not status_dict:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Worker {worker_id} not found"
)
return WorkerStatusResponse(
worker_id=status_dict["worker_id"],
worker_type=status_dict["type"], # Fixed: use "type" instead of "worker_type"
device_id=status_dict.get("device_id"),
status=status_dict["status"],
current_job_id=status_dict.get("current_job_id"),
jobs_completed=status_dict["jobs_completed"],
jobs_failed=status_dict["jobs_failed"],
uptime_seconds=status_dict.get("uptime_seconds", 0),
current_job_progress=status_dict.get("current_job_progress", 0.0),
current_job_eta=status_dict.get("current_job_eta"),
)
@router.post("/", response_model=WorkerStatusResponse, status_code=status.HTTP_201_CREATED)
async def add_worker(request: WorkerAddRequest):
"""
Add a new worker to the pool.
Args:
request: Worker add request
Returns:
Created worker status
Raises:
400: Invalid worker type or configuration
"""
from backend.app import worker_pool
# Validate worker type
try:
wtype = WorkerType(request.worker_type.lower())
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid worker type: {request.worker_type}. Must be 'cpu' or 'gpu'"
)
# Validate GPU worker requirements
if wtype == WorkerType.GPU and request.device_id is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="device_id is required for GPU workers"
)
# Add worker
worker_id = worker_pool.add_worker(wtype, request.device_id)
# Get status
status_dict = worker_pool.get_worker_status(worker_id)
if not status_dict:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to create worker"
)
logger.info(f"Worker {worker_id} added via API")
return WorkerStatusResponse(
worker_id=status_dict["worker_id"],
worker_type=status_dict["type"], # Fixed: use "type" instead of "worker_type"
device_id=status_dict.get("device_id"),
status=status_dict["status"],
current_job_id=status_dict.get("current_job_id"),
jobs_completed=status_dict["jobs_completed"],
jobs_failed=status_dict["jobs_failed"],
uptime_seconds=status_dict.get("uptime_seconds", 0),
current_job_progress=status_dict.get("current_job_progress", 0.0),
current_job_eta=status_dict.get("current_job_eta"),
)
@router.delete("/{worker_id}", response_model=MessageResponse)
async def remove_worker(worker_id: str, timeout: float = 30.0):
"""
Remove a worker from the pool.
Args:
worker_id: Worker ID to remove
timeout: Maximum time to wait for worker to stop (seconds)
Returns:
Success message
Raises:
404: Worker not found
"""
from backend.app import worker_pool
success = worker_pool.remove_worker(worker_id, timeout=timeout)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Worker {worker_id} not found"
)
logger.info(f"Worker {worker_id} removed via API")
return MessageResponse(message=f"Worker {worker_id} removed successfully")
@router.post("/pool/start", response_model=MessageResponse)
async def start_pool(cpu_workers: int = 0, gpu_workers: int = 0):
"""
Start the worker pool.
Args:
cpu_workers: Number of CPU workers to start
gpu_workers: Number of GPU workers to start
Returns:
Success message
"""
from backend.app import worker_pool
worker_pool.start(cpu_workers=cpu_workers, gpu_workers=gpu_workers)
logger.info(f"Worker pool started via API: {cpu_workers} CPU, {gpu_workers} GPU")
return MessageResponse(
message=f"Worker pool started: {cpu_workers} CPU workers, {gpu_workers} GPU workers"
)
@router.post("/pool/stop", response_model=MessageResponse)
async def stop_pool(timeout: float = 30.0):
"""
Stop the worker pool.
Args:
timeout: Maximum time to wait for each worker to stop (seconds)
Returns:
Success message
"""
from backend.app import worker_pool
worker_pool.stop(timeout=timeout)
logger.info("Worker pool stopped via API")
return MessageResponse(message="Worker pool stopped successfully")

290
backend/app.py Normal file
View File

@@ -0,0 +1,290 @@
"""Main FastAPI application for TranscriptorIO backend."""
import logging
import os
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from backend.core.database import database
from backend.core.worker_pool import WorkerPool
from backend.core.queue_manager import queue_manager
from backend.scanning.library_scanner import library_scanner
# Import API routers
from backend.api.workers import router as workers_router
from backend.api.jobs import router as jobs_router
from backend.api.scan_rules import router as scan_rules_router
from backend.api.scanner import router as scanner_router
from backend.api.settings import router as settings_router
from backend.api.setup_wizard import router as setup_router
from backend.api.system import router as system_router
from backend.api.filesystem import router as filesystem_router
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# Global worker pool instance
worker_pool = WorkerPool()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
Application lifespan manager.
Handles startup and shutdown tasks:
- Database initialization
- Worker pool startup (if configured)
- Library scanner startup (if configured)
- Graceful shutdown
"""
# === STARTUP ===
logger.info("=== TranscriptorIO Backend Starting ===")
# Initialize database
logger.info("Initializing database...")
database.init_db()
logger.info("Database initialized")
# Clean up orphaned jobs from previous session
from backend.core.queue_manager import queue_manager
try:
cleaned = queue_manager.cleanup_orphaned_jobs()
if cleaned > 0:
logger.info(f"Cleaned up {cleaned} orphaned job(s) from previous session")
except Exception as e:
logger.error(f"Failed to cleanup orphaned jobs: {e}")
# Initialize default settings if needed
from backend.core.settings_service import settings_service
try:
settings_service.init_default_settings()
logger.info("Settings initialized")
except Exception as e:
logger.warning(f"Could not initialize settings: {e}")
# Initialize scanner stats from existing jobs if not already set
try:
from backend.core.models import Job, JobStatus
scan_count = settings_service.get('scanner_scan_count')
if scan_count is None or scan_count == 0:
# Count completed jobs as an approximation of files scanned
with database.get_session() as session:
completed_count = session.query(Job).filter(
Job.status == JobStatus.COMPLETED
).count()
if completed_count > 0:
settings_service.set('scanner_total_files_scanned', str(completed_count), category='scanner')
settings_service.set('scanner_scan_count', '1', category='scanner') # At least 1 scan happened
logger.info(f"Initialized scanner stats from existing jobs: {completed_count} files")
except Exception as e:
logger.warning(f"Could not initialize scanner stats: {e}")
# Start worker pool if configured (and Whisper is available)
from backend.transcription.transcriber import WHISPER_AVAILABLE
from backend.core.system_monitor import system_monitor
cpu_workers = int(settings_service.get("worker_cpu_count", 0))
gpu_workers = int(settings_service.get("worker_gpu_count", 0))
# Validate GPU workers - force to 0 if no GPU available
if gpu_workers > 0 and system_monitor.gpu_count == 0:
logger.warning(
f"GPU workers configured ({gpu_workers}) but no GPU detected. "
"GPU workers will NOT be started. Setting gpu_workers=0."
)
gpu_workers = 0
# Also update the setting to prevent confusion
settings_service.set("worker_gpu_count", "0")
if not WHISPER_AVAILABLE:
if cpu_workers > 0 or gpu_workers > 0:
logger.warning(
"Whisper is not installed but workers are configured. "
"Workers will NOT be started. Install stable-ts or faster-whisper to enable transcription."
)
elif cpu_workers > 0 or gpu_workers > 0:
logger.info(f"Starting worker pool: {cpu_workers} CPU, {gpu_workers} GPU")
worker_pool.start(cpu_workers=cpu_workers, gpu_workers=gpu_workers)
else:
logger.info("No workers configured to start automatically")
# Start library scanner scheduler (enabled by default)
scanner_enabled = settings_service.get("scanner_enabled", True)
if scanner_enabled in (True, "true", "True", "1", 1):
# Get library paths from settings
library_paths = settings_service.get("library_paths", "")
if isinstance(library_paths, list):
paths = [p.strip() for p in library_paths if p and p.strip()]
elif isinstance(library_paths, str) and library_paths:
paths = [p.strip() for p in library_paths.split(",") if p.strip()]
else:
paths = []
if paths:
interval_minutes = int(settings_service.get("scanner_schedule_interval_minutes", 360))
logger.info(f"Starting library scanner scheduler (every {interval_minutes} minutes)")
library_scanner.start_scheduler(interval_minutes=interval_minutes)
else:
logger.info("Scanner enabled but no library paths configured - scheduler not started")
else:
logger.info("Library scanner scheduler disabled in settings")
# Start file watcher if configured
watcher_enabled = settings_service.get("watcher_enabled", False)
if watcher_enabled in (True, "true", "True", "1", 1):
library_paths = settings_service.get("library_paths", "")
if isinstance(library_paths, list):
watcher_paths = [p.strip() for p in library_paths if p and p.strip()]
elif isinstance(library_paths, str) and library_paths:
watcher_paths = [p.strip() for p in library_paths.split(",") if p.strip()]
else:
watcher_paths = []
if watcher_paths:
logger.info(f"Starting file watcher: {watcher_paths}")
library_scanner.start_file_watcher(
paths=watcher_paths,
recursive=True
)
else:
logger.info("File watcher enabled but no library paths configured")
logger.info("=== TranscriptorIO Backend Started ===")
yield
# === SHUTDOWN ===
logger.info("=== TranscriptorIO Backend Shutting Down ===")
# Stop library scanner first (quick operations)
logger.info("Stopping library scanner...")
try:
library_scanner.stop_scheduler()
library_scanner.stop_file_watcher()
except Exception as e:
logger.warning(f"Error stopping scanner: {e}")
# Stop worker pool with shorter timeout
logger.info("Stopping worker pool...")
try:
worker_pool.stop(timeout=5.0)
except Exception as e:
logger.warning(f"Error stopping worker pool: {e}")
logger.info("=== TranscriptorIO Backend Stopped ===")
# Create FastAPI app
app = FastAPI(
title="TranscriptorIO API",
description="AI-powered subtitle transcription service",
version="1.0.0",
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # TODO: Configure this properly
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount API routers
app.include_router(workers_router)
app.include_router(jobs_router)
app.include_router(scan_rules_router)
app.include_router(scanner_router)
app.include_router(settings_router)
app.include_router(setup_router)
app.include_router(system_router)
app.include_router(filesystem_router)
# === ROOT ENDPOINTS ===
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"database": "connected",
"workers": len(worker_pool.workers),
"queue_size": len(queue_manager.get_queued_jobs())
}
@app.get("/api/status")
async def get_status():
"""
Get overall system status.
Returns comprehensive system status including:
- Worker pool status
- Queue statistics
- Scanner status
"""
pool_stats = worker_pool.get_pool_stats()
queue_stats = queue_manager.get_queue_stats()
scanner_status = library_scanner.get_status()
return {
"system": {
"status": "running",
"uptime_seconds": pool_stats.get("uptime_seconds"),
},
"workers": pool_stats,
"queue": queue_stats,
"scanner": scanner_status,
}
# === FRONTEND STATIC FILES ===
# Check if frontend build exists
frontend_path = Path(__file__).parent.parent / "frontend" / "dist"
if frontend_path.exists() and frontend_path.is_dir():
# Mount static assets
app.mount("/assets", StaticFiles(directory=str(frontend_path / "assets")), name="assets")
# Serve index.html for all frontend routes
@app.get("/")
@app.get("/{full_path:path}")
async def serve_frontend(full_path: str = ""):
"""Serve frontend application."""
# Don't serve frontend for API routes
if full_path.startswith("api/") or full_path.startswith("health") or full_path.startswith("docs") or full_path.startswith("redoc") or full_path.startswith("openapi.json"):
return {"error": "Not found"}
index_file = frontend_path / "index.html"
if index_file.exists():
return FileResponse(str(index_file))
return {"error": "Frontend not built. Run: cd frontend && npm run build"}
else:
# No frontend build - serve API info
@app.get("/")
async def root():
"""Root endpoint - API info."""
return {
"name": "TranscriptorIO API",
"version": "1.0.0",
"status": "running",
"message": "Frontend not built. Access API docs at /docs"
}
# Export worker_pool for API access
__all__ = ["app", "worker_pool"]

222
backend/cli.py Executable file
View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""CLI entry point for TranscriptorIO backend."""
import argparse
import logging
import sys
import os
# Add parent directory to path to allow imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import uvicorn
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="TranscriptorIO - AI-powered subtitle transcription service"
)
# Subcommands
subparsers = parser.add_subparsers(dest="command", help="Command to run")
# Server command
server_parser = subparsers.add_parser("server", help="Run FastAPI server")
server_parser.add_argument(
"--host",
default="0.0.0.0",
help="Host to bind to (default: 0.0.0.0)"
)
server_parser.add_argument(
"--port",
type=int,
default=8000,
help="Port to bind to (default: 8000)"
)
server_parser.add_argument(
"--reload",
action="store_true",
help="Enable auto-reload for development"
)
server_parser.add_argument(
"--workers",
type=int,
default=1,
help="Number of worker processes (default: 1)"
)
server_parser.add_argument(
"--log-level",
choices=["debug", "info", "warning", "error", "critical"],
default="info",
help="Log level (default: info)"
)
# Database command
db_parser = subparsers.add_parser("db", help="Database operations")
db_parser.add_argument(
"action",
choices=["init", "migrate", "reset", "backup"],
help="Database action"
)
# Worker command
worker_parser = subparsers.add_parser("worker", help="Start standalone worker")
worker_parser.add_argument(
"--type",
choices=["cpu", "gpu"],
default="cpu",
help="Worker type (default: cpu)"
)
worker_parser.add_argument(
"--device-id",
type=int,
default=0,
help="GPU device ID (default: 0)"
)
# Scanner command
scan_parser = subparsers.add_parser("scan", help="Run library scan")
scan_parser.add_argument(
"paths",
nargs="+",
help="Paths to scan"
)
scan_parser.add_argument(
"--no-recursive",
action="store_true",
help="Don't scan subdirectories"
)
# Setup command
subparsers.add_parser("setup", help="Run setup wizard")
# Parse arguments
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
# Execute command
if args.command == "server":
run_server(args)
elif args.command == "db":
run_db_command(args)
elif args.command == "worker":
run_worker(args)
elif args.command == "scan":
run_scan(args)
elif args.command == "setup":
run_setup()
def run_server(args):
"""Run FastAPI server."""
print(f"🚀 Starting TranscriptorIO server on {args.host}:{args.port}")
print(f"📖 API docs available at: http://{args.host}:{args.port}/docs")
uvicorn.run(
"backend.app:app",
host=args.host,
port=args.port,
reload=args.reload,
workers=args.workers if not args.reload else 1,
log_level=args.log_level,
)
def run_db_command(args):
"""Run database command."""
from backend.core.database import database
if args.action == "init":
print("Initializing database...")
database.init_db()
print("✅ Database initialized")
elif args.action == "reset":
print("⚠️ WARNING: This will delete all data!")
confirm = input("Type 'yes' to confirm: ")
if confirm.lower() == "yes":
print("Resetting database...")
database.reset_db()
print("✅ Database reset")
else:
print("❌ Cancelled")
elif args.action == "migrate":
print("❌ Migrations not yet implemented")
sys.exit(1)
elif args.action == "backup":
print("❌ Backup not yet implemented")
sys.exit(1)
def run_worker(args):
"""Run standalone worker."""
from backend.core.worker import Worker, WorkerType
import signal
worker_type = WorkerType.CPU if args.type == "cpu" else WorkerType.GPU
device_id = args.device_id if worker_type == WorkerType.GPU else None
worker_id = f"standalone-{args.type}"
if device_id is not None:
worker_id += f"-{device_id}"
print(f"🔧 Starting standalone worker: {worker_id}")
worker = Worker(worker_id, worker_type, device_id)
# Handle shutdown
def signal_handler(sig, frame):
print("\n⏹️ Stopping worker...")
worker.stop()
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
worker.start()
# Keep alive
try:
while True:
import time
time.sleep(1)
except KeyboardInterrupt:
worker.stop()
def run_scan(args):
"""Run library scan."""
from backend.scanning.library_scanner import library_scanner
print(f"🔍 Scanning {len(args.paths)} path(s)...")
result = library_scanner.scan_paths(
paths=args.paths,
recursive=not args.no_recursive
)
print(f"\n✅ Scan complete:")
print(f" 📁 Files scanned: {result['scanned_files']}")
print(f" ✅ Matched: {result['matched_files']}")
print(f" 📋 Jobs created: {result['jobs_created']}")
print(f" ⏭️ Skipped: {result['skipped_files']}")
def run_setup():
"""Run setup wizard."""
from backend.setup_wizard import SetupWizard
wizard = SetupWizard()
wizard.run()
if __name__ == "__main__":
main()

View File

@@ -1,18 +1,17 @@
"""Configuration management for TranscriptorIO."""
import os
"""Configuration management for TranscriptorIO.
Most configuration is now stored in the database and managed through the
Settings service. Only DATABASE_URL is loaded from environment variables.
For runtime configuration, use:
from backend.core.settings_service import settings_service
value = settings_service.get("setting_key", default_value)
"""
from enum import Enum
from typing import Optional, List
from pydantic_settings import BaseSettings
from pydantic import Field, field_validator
class OperationMode(str, Enum):
"""Operation modes for TranscriptorIO."""
STANDALONE = "standalone"
PROVIDER = "provider"
HYBRID = "standalone,provider"
class DatabaseType(str, Enum):
"""Supported database backends."""
SQLITE = "sqlite"
@@ -22,131 +21,29 @@ class DatabaseType(str, Enum):
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
"""
Application settings loaded from environment variables.
# === Application Mode ===
transcriptarr_mode: str = Field(
default="standalone",
description="Operation mode: standalone, provider, or standalone,provider"
)
Only DATABASE_URL is required. All other configuration is stored
in the database and managed through the Settings API/UI.
"""
# === Database Configuration ===
# === Database Configuration (REQUIRED) ===
database_url: str = Field(
default="sqlite:///./transcriptarr.db",
description="Database connection URL. Examples:\n"
" SQLite: sqlite:///./transcriptarr.db\n"
" PostgreSQL: postgresql://user:pass@localhost/transcriptarr\n"
" MariaDB: mariadb+pymysql://user:pass@localhost/transcriptarr"
description="Database connection URL"
)
# === Worker Configuration ===
concurrent_transcriptions: int = Field(default=2, ge=1, le=10)
whisper_threads: int = Field(default=4, ge=1, le=32)
transcribe_device: str = Field(default="cpu", pattern="^(cpu|gpu|cuda)$")
clear_vram_on_complete: bool = Field(default=True)
# === Whisper Model Configuration ===
whisper_model: str = Field(
default="medium",
description="Whisper model: tiny, base, small, medium, large-v3, etc."
)
model_path: str = Field(default="./models")
compute_type: str = Field(default="auto")
# === Standalone Mode Configuration ===
library_paths: Optional[str] = Field(
default=None,
description="Pipe-separated paths to scan: /media/anime|/media/movies"
)
auto_scan_enabled: bool = Field(default=False)
scan_interval_minutes: int = Field(default=30, ge=1)
required_audio_language: Optional[str] = Field(
default=None,
description="Only process files with this audio language (ISO 639-2)"
)
required_missing_subtitle: Optional[str] = Field(
default=None,
description="Only process if this subtitle language is missing (ISO 639-2)"
)
skip_if_subtitle_exists: bool = Field(default=True)
# === Provider Mode Configuration ===
bazarr_url: Optional[str] = Field(default=None)
bazarr_api_key: Optional[str] = Field(default=None)
provider_timeout_seconds: int = Field(default=600, ge=60)
provider_callback_enabled: bool = Field(default=True)
provider_polling_interval: int = Field(default=30, ge=10)
# === API Configuration ===
webhook_port: int = Field(default=9000, ge=1024, le=65535)
api_host: str = Field(default="0.0.0.0")
debug: bool = Field(default=True)
# === Transcription Settings ===
transcribe_or_translate: str = Field(
default="transcribe",
pattern="^(transcribe|translate)$"
)
subtitle_language_name: str = Field(default="")
subtitle_language_naming_type: str = Field(
default="ISO_639_2_B",
description="Naming format: ISO_639_1, ISO_639_2_T, ISO_639_2_B, NAME, NATIVE"
)
word_level_highlight: bool = Field(default=False)
custom_regroup: str = Field(default="cm_sl=84_sl=42++++++1")
# === Skip Configuration ===
skip_if_external_subtitles_exist: bool = Field(default=False)
skip_if_target_subtitles_exist: bool = Field(default=True)
skip_if_internal_subtitles_language: Optional[str] = Field(default="eng")
skip_subtitle_languages: Optional[str] = Field(
default=None,
description="Pipe-separated language codes to skip: eng|spa"
)
skip_if_audio_languages: Optional[str] = Field(
default=None,
description="Skip if audio track is in these languages: eng|spa"
)
skip_unknown_language: bool = Field(default=False)
skip_only_subgen_subtitles: bool = Field(default=False)
# === Advanced Settings ===
force_detected_language_to: Optional[str] = Field(default=None)
detect_language_length: int = Field(default=30, ge=5)
detect_language_offset: int = Field(default=0, ge=0)
should_whisper_detect_audio_language: bool = Field(default=False)
preferred_audio_languages: str = Field(
default="eng",
description="Pipe-separated list in order of preference: eng|jpn"
)
# === Path Mapping ===
use_path_mapping: bool = Field(default=False)
path_mapping_from: str = Field(default="/tv")
path_mapping_to: str = Field(default="/Volumes/TV")
# === Legacy SubGen Compatibility ===
show_in_subname_subgen: bool = Field(default=True)
show_in_subname_model: bool = Field(default=True)
append: bool = Field(default=False)
lrc_for_audio_files: bool = Field(default=True)
@field_validator("transcriptarr_mode")
@classmethod
def validate_mode(cls, v: str) -> str:
"""Validate operation mode."""
valid_modes = {"standalone", "provider", "standalone,provider"}
if v not in valid_modes:
raise ValueError(f"Invalid mode: {v}. Must be one of: {valid_modes}")
return v
@field_validator("database_url")
@classmethod
def validate_database_url(cls, v: str) -> str:
"""Validate database URL format."""
valid_prefixes = ("sqlite://", "postgresql://", "mariadb+pymysql://", "mysql+pymysql://")
valid_prefixes = (
"sqlite://",
"postgresql://",
"mariadb+pymysql://",
"mysql+pymysql://"
)
if not any(v.startswith(prefix) for prefix in valid_prefixes):
raise ValueError(
f"Invalid database URL. Must start with one of: {valid_prefixes}"
@@ -167,42 +64,6 @@ class Settings(BaseSettings):
else:
raise ValueError(f"Unknown database type in URL: {self.database_url}")
@property
def is_standalone_mode(self) -> bool:
"""Check if standalone mode is enabled."""
return "standalone" in self.transcriptarr_mode
@property
def is_provider_mode(self) -> bool:
"""Check if provider mode is enabled."""
return "provider" in self.transcriptarr_mode
@property
def library_paths_list(self) -> List[str]:
"""Get library paths as a list."""
if not self.library_paths:
return []
return [p.strip() for p in self.library_paths.split("|") if p.strip()]
@property
def skip_subtitle_languages_list(self) -> List[str]:
"""Get skip subtitle languages as a list."""
if not self.skip_subtitle_languages:
return []
return [lang.strip() for lang in self.skip_subtitle_languages.split("|") if lang.strip()]
@property
def skip_audio_languages_list(self) -> List[str]:
"""Get skip audio languages as a list."""
if not self.skip_if_audio_languages:
return []
return [lang.strip() for lang in self.skip_if_audio_languages.split("|") if lang.strip()]
@property
def preferred_audio_languages_list(self) -> List[str]:
"""Get preferred audio languages as a list."""
return [lang.strip() for lang in self.preferred_audio_languages.split("|") if lang.strip()]
class Config:
"""Pydantic configuration."""
env_file = ".env"

View File

@@ -57,7 +57,7 @@ class Database:
settings.database_url,
connect_args=connect_args,
poolclass=poolclass,
echo=settings.debug,
echo=False,
)
@event.listens_for(engine, "connect")
@@ -85,7 +85,7 @@ class Database:
pool_size=10,
max_overflow=20,
pool_pre_ping=True, # Verify connections before using
echo=settings.debug,
echo=False,
)
elif settings.database_type in (DatabaseType.MARIADB, DatabaseType.MYSQL):
@@ -107,18 +107,26 @@ class Database:
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
echo=settings.debug,
echo=False,
)
else:
raise ValueError(f"Unsupported database type: {settings.database_type}")
# Disable SQLAlchemy INFO logs for cleaner output
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.pool').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.dialects').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.orm').setLevel(logging.WARNING)
return engine
def _ensure_tables_exist(self):
"""Check if tables exist and create them if they don't."""
# Import models to register them with Base.metadata
from backend.core import models # noqa: F401
from backend.core import settings_model # noqa: F401
from backend.scanning import models as scanning_models # noqa: F401
from sqlalchemy import inspect
inspector = inspect(self.engine)
@@ -135,6 +143,8 @@ class Database:
"""Create all database tables."""
# Import models to register them with Base.metadata
from backend.core import models # noqa: F401
from backend.core import settings_model # noqa: F401
from backend.scanning import models as scanning_models # noqa: F401
logger.info("Creating database tables...")
Base.metadata.create_all(bind=self.engine, checkfirst=True)
@@ -150,6 +160,28 @@ class Database:
logger.error(f"Failed to create tables. Existing tables: {created_tables}")
raise RuntimeError("Failed to create database tables")
def init_db(self):
"""
Initialize database.
Ensures tables exist and are up to date.
Safe to call multiple times.
"""
logger.info("Initializing database...")
self._ensure_tables_exist()
logger.info("Database initialization complete")
def reset_db(self):
"""
Reset database (drop and recreate all tables).
WARNING: This deletes ALL data!
"""
logger.warning("Resetting database - ALL DATA WILL BE LOST")
self.drop_tables()
self.create_tables()
logger.info("Database reset complete")
def drop_tables(self):
"""Drop all database tables (use with caution!)."""
logger.warning("Dropping all database tables...")

View File

@@ -1,6 +1,6 @@
"""Database models for TranscriptorIO."""
import uuid
from datetime import datetime
from datetime import datetime, timezone
from enum import Enum
from typing import Optional
@@ -12,6 +12,12 @@ from sqlalchemy.sql import func
from backend.core.database import Base
class JobType(str, Enum):
"""Job type classification."""
TRANSCRIPTION = "transcription" # Regular transcription/translation job
LANGUAGE_DETECTION = "language_detection" # Language detection only
class JobStatus(str, Enum):
"""Job status states."""
QUEUED = "queued"
@@ -24,7 +30,9 @@ class JobStatus(str, Enum):
class JobStage(str, Enum):
"""Job processing stages."""
PENDING = "pending"
LOADING_MODEL = "loading_model"
DETECTING_LANGUAGE = "detecting_language"
LANGUAGE_DETECTION = "language_detection" # Alias for backward compatibility
EXTRACTING_AUDIO = "extracting_audio"
TRANSCRIBING = "transcribing"
TRANSLATING = "translating"
@@ -50,6 +58,14 @@ class Job(Base):
file_path = Column(String(1024), nullable=False, index=True)
file_name = Column(String(512), nullable=False)
# Job classification
job_type = Column(
SQLEnum(JobType),
nullable=False,
default=JobType.TRANSCRIPTION,
index=True
)
# Job status
status = Column(
SQLEnum(JobStatus),
@@ -126,15 +142,25 @@ class Job(Base):
@property
def can_retry(self) -> bool:
"""Check if job can be retried."""
return self.status == JobStatus.FAILED and self.retry_count < self.max_retries
"""Check if job can be retried. Always allow retry for failed jobs."""
return self.status == JobStatus.FAILED
def to_dict(self) -> dict:
"""Convert job to dictionary for API responses."""
def format_datetime(dt):
"""Format datetime as ISO string with UTC timezone."""
if not dt:
return None
# If timezone-naive, assume UTC
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.isoformat()
return {
"id": self.id,
"file_path": self.file_path,
"file_name": self.file_name,
"job_type": self.job_type.value if self.job_type else "transcription",
"status": self.status.value,
"priority": self.priority,
"source_lang": self.source_lang,
@@ -144,9 +170,9 @@ class Job(Base):
"progress": self.progress,
"current_stage": self.current_stage.value if self.current_stage else None,
"eta_seconds": self.eta_seconds,
"created_at": self.created_at.isoformat() if self.created_at else None,
"started_at": self.started_at.isoformat() if self.started_at else None,
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"created_at": format_datetime(self.created_at),
"started_at": format_datetime(self.started_at),
"completed_at": format_datetime(self.completed_at),
"output_path": self.output_path,
"segments_count": self.segments_count,
"error": self.error,
@@ -168,13 +194,13 @@ class Job(Base):
def mark_started(self, worker_id: str):
"""Mark job as started."""
self.status = JobStatus.PROCESSING
self.started_at = datetime.utcnow()
self.started_at = datetime.now(timezone.utc)
self.worker_id = worker_id
def mark_completed(self, output_path: str, segments_count: int, srt_content: Optional[str] = None):
"""Mark job as completed."""
self.status = JobStatus.COMPLETED
self.completed_at = datetime.utcnow()
self.completed_at = datetime.now(timezone.utc)
self.output_path = output_path
self.segments_count = segments_count
self.srt_content = srt_content
@@ -182,19 +208,24 @@ class Job(Base):
self.current_stage = JobStage.FINALIZING
if self.started_at:
self.processing_time_seconds = (self.completed_at - self.started_at).total_seconds()
# Handle both timezone-aware and timezone-naive datetimes
started = self.started_at
if started.tzinfo is None:
# Convert naive datetime to UTC timezone-aware
started = started.replace(tzinfo=timezone.utc)
self.processing_time_seconds = (self.completed_at - started).total_seconds()
def mark_failed(self, error: str):
"""Mark job as failed."""
self.status = JobStatus.FAILED
self.completed_at = datetime.utcnow()
self.completed_at = datetime.now(timezone.utc)
self.error = error
self.retry_count += 1
def mark_cancelled(self):
"""Mark job as cancelled."""
self.status = JobStatus.CANCELLED
self.completed_at = datetime.utcnow()
self.completed_at = datetime.now(timezone.utc)
# Create indexes for common queries

View File

@@ -1,6 +1,6 @@
"""Queue manager for persistent job queuing."""
import logging
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import List, Optional, Dict
from sqlalchemy import and_, or_
from sqlalchemy.orm import Session
@@ -39,6 +39,7 @@ class QueueManager:
priority: int = 0,
bazarr_callback_url: Optional[str] = None,
is_manual_request: bool = False,
job_type: Optional['JobType'] = None,
) -> Optional[Job]:
"""
Add a new job to the queue.
@@ -53,20 +54,29 @@ class QueueManager:
priority: Job priority (higher = processed first)
bazarr_callback_url: Callback URL for Bazarr provider mode
is_manual_request: Whether this is a manual request (higher priority)
job_type: Type of job (transcription or language_detection)
Returns:
Job object if created, None if duplicate exists
"""
from backend.core.models import JobType
if job_type is None:
job_type = JobType.TRANSCRIPTION
with self.db.get_session() as session:
# Check for existing job
existing = self._find_existing_job(session, file_path, target_lang)
if existing:
logger.info(f"Job already exists for {file_name}: {existing.id} [{existing.status.value}]")
logger.warning(
f"Duplicate job detected for {file_name}: "
f"Existing job {existing.id} [{existing.status.value}] "
f"target={target_lang}, path={file_path}"
)
# If existing job failed and can retry, reset it
if existing.can_retry:
logger.info(f"Resetting failed job {existing.id} for retry")
logger.info(f"Auto-retrying failed job {existing.id}")
existing.status = JobStatus.QUEUED
existing.error = None
existing.current_stage = JobStage.PENDING
@@ -74,12 +84,14 @@ class QueueManager:
session.commit()
return existing
logger.info(f"Job {existing.id} cannot be auto-retried (status: {existing.status.value})")
return None
# Create new job
job = Job(
file_path=file_path,
file_name=file_name,
job_type=job_type,
source_lang=source_lang,
target_lang=target_lang,
quality_preset=quality_preset,
@@ -195,7 +207,11 @@ class QueueManager:
job_id: str,
output_path: str,
segments_count: int,
srt_content: Optional[str] = None
srt_content: Optional[str] = None,
model_used: Optional[str] = None,
device_used: Optional[str] = None,
processing_time_seconds: Optional[float] = None,
detected_language: Optional[str] = None
) -> bool:
"""Mark a job as completed."""
with self.db.get_session() as session:
@@ -206,6 +222,17 @@ class QueueManager:
return False
job.mark_completed(output_path, segments_count, srt_content)
# Set optional metadata if provided
if model_used:
job.model_used = model_used
if device_used:
job.device_used = device_used
if processing_time_seconds is not None:
job.processing_time_seconds = processing_time_seconds
if detected_language:
job.source_lang = detected_language
session.commit()
logger.info(
@@ -227,7 +254,7 @@ class QueueManager:
session.commit()
logger.error(
f"Job {job_id} failed (attempt {job.retry_count}/{job.max_retries}): {error}"
f"Job {job_id} failed (attempt #{job.retry_count}): {error}"
)
return True
@@ -260,7 +287,7 @@ class QueueManager:
failed = session.query(Job).filter(Job.status == JobStatus.FAILED).count()
# Get today's stats
today = datetime.utcnow().date()
today = datetime.now(timezone.utc).date()
completed_today = (
session.query(Job)
.filter(
@@ -321,6 +348,115 @@ class QueueManager:
return jobs
def get_all_jobs(
self,
status_filter: Optional[JobStatus] = None,
limit: int = 50,
offset: int = 0
) -> List[Job]:
"""
Get all jobs with optional filtering.
Args:
status_filter: Filter by status
limit: Maximum number of jobs to return
offset: Offset for pagination
Returns:
List of Job objects (detached from session)
"""
with self.db.get_session() as session:
query = session.query(Job)
if status_filter:
query = query.filter(Job.status == status_filter)
jobs = (
query
.order_by(Job.created_at.desc())
.limit(limit)
.offset(offset)
.all()
)
# Expunge all jobs from session so they don't expire
for job in jobs:
session.expunge(job)
return jobs
def count_jobs(self, status_filter: Optional[JobStatus] = None) -> int:
"""
Count jobs with optional filtering.
Args:
status_filter: Filter by status
Returns:
Number of jobs
"""
with self.db.get_session() as session:
query = session.query(Job)
if status_filter:
query = query.filter(Job.status == status_filter)
return query.count()
def retry_job(self, job_id: str) -> bool:
"""
Retry a failed job.
Args:
job_id: Job ID to retry
Returns:
True if job was reset to queued, False otherwise
"""
with self.db.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
if not job:
logger.warning(f"Job {job_id} not found for retry")
return False
if not job.can_retry:
logger.warning(f"Job {job_id} cannot be retried")
return False
# Reset job to queued
job.status = JobStatus.QUEUED
job.error = None
job.current_stage = JobStage.PENDING
job.progress = 0.0
job.worker_id = None
job.retry_count += 1 # Increment retry count for tracking
session.commit()
logger.info(f"Job {job_id} reset for retry (attempt #{job.retry_count})")
return True
def clear_completed_jobs(self) -> int:
"""
Clear all completed jobs.
Returns:
Number of jobs cleared
"""
with self.db.get_session() as session:
deleted = (
session.query(Job)
.filter(Job.status == JobStatus.COMPLETED)
.delete()
)
session.commit()
if deleted > 0:
logger.info(f"Cleared {deleted} completed jobs")
return deleted
def get_processing_jobs(self) -> List[Job]:
"""Get all currently processing jobs."""
return self.get_jobs(status=JobStatus.PROCESSING)
@@ -350,7 +486,7 @@ class QueueManager:
Number of jobs deleted
"""
with self.db.get_session() as session:
cutoff_date = datetime.utcnow() - timedelta(days=days)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
deleted = (
session.query(Job)
@@ -389,6 +525,48 @@ class QueueManager:
return query.first()
def cleanup_orphaned_jobs(self) -> int:
"""
Clean up orphaned jobs after server restart.
Jobs stuck in 'processing' state with no active worker are marked as failed.
This prevents jobs from being stuck forever after a restart.
Returns:
Number of jobs cleaned up
"""
from datetime import datetime
with self.db.get_session() as session:
# Find all jobs in processing state
orphaned_jobs = session.query(Job).filter(
Job.status == JobStatus.PROCESSING
).all()
cleaned_count = 0
for job in orphaned_jobs:
# Mark as failed with appropriate error message
job.status = JobStatus.FAILED
job.error = "Job interrupted by server restart"
job.completed_at = datetime.now(timezone.utc)
job.progress = 0.0
job.current_stage = JobStage.PENDING
job.worker_id = None
logger.warning(
f"Cleaned up orphaned job {job.id} ({job.file_name}) - "
f"was stuck in processing state"
)
cleaned_count += 1
session.commit()
if cleaned_count > 0:
logger.info(f"Cleaned up {cleaned_count} orphaned job(s) after restart")
return cleaned_count
# Global queue manager instance
queue_manager = QueueManager()
queue_manager = QueueManager()

View File

@@ -0,0 +1,74 @@
"""Database model for system settings."""
from datetime import datetime
from sqlalchemy import Column, Integer, String, Text, DateTime
from sqlalchemy.sql import func
from backend.core.database import Base
class SystemSettings(Base):
"""
System settings stored in database.
Replaces .env configuration for dynamic settings management through WebUI.
Settings are organized by category and support different value types.
"""
__tablename__ = "system_settings"
# Primary key
id = Column(Integer, primary_key=True, autoincrement=True)
# Setting identification
key = Column(String(255), nullable=False, unique=True, index=True)
value = Column(Text, nullable=True) # Store as string, parse based on value_type
# Metadata
description = Column(Text, nullable=True)
category = Column(String(100), nullable=True, index=True) # general, workers, transcription, scanner, bazarr
value_type = Column(String(50), nullable=True) # string, integer, boolean, float, list
# Timestamps
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
def __repr__(self):
"""String representation."""
return f"<SystemSettings {self.key}={self.value}>"
def to_dict(self) -> dict:
"""Convert to dictionary for API responses."""
return {
"id": self.id,
"key": self.key,
"value": self.value,
"description": self.description,
"category": self.category,
"value_type": self.value_type,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
def get_parsed_value(self):
"""
Parse value based on value_type.
Returns:
Parsed value in appropriate Python type
"""
if self.value is None:
return None
if self.value_type == "boolean":
return self.value.lower() in ("true", "1", "yes", "on")
elif self.value_type == "integer":
return int(self.value)
elif self.value_type == "float":
return float(self.value)
elif self.value_type == "list":
# Comma-separated values
return [v.strip() for v in self.value.split(",") if v.strip()]
else: # string or unknown
return self.value

View File

@@ -0,0 +1,541 @@
"""Settings service for database-backed configuration."""
import logging
from typing import Optional, Dict, Any, List
from sqlalchemy.exc import IntegrityError
from backend.core.database import database
from backend.core.settings_model import SystemSettings
logger = logging.getLogger(__name__)
class SettingsService:
"""
Service for managing system settings in database.
Provides caching and type-safe access to settings.
Settings are organized by category: general, workers, transcription, scanner, bazarr
"""
def __init__(self):
"""Initialize settings service."""
self._cache: Dict[str, Any] = {}
self._cache_valid = False
def get(self, key: str, default: Any = None) -> Any:
"""
Get setting value by key.
Args:
key: Setting key
default: Default value if not found
Returns:
Parsed setting value or default
"""
# Refresh cache if needed
if not self._cache_valid:
self._load_cache()
return self._cache.get(key, default)
def set(self, key: str, value: Any, description: str = None, category: str = None, value_type: str = None) -> bool:
"""
Set setting value.
Args:
key: Setting key
value: Setting value (will be converted to string)
description: Optional description
category: Optional category
value_type: Optional type (string, integer, boolean, float, list)
Returns:
True if successful
"""
with database.get_session() as session:
setting = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if setting:
# Update existing
setting.value = str(value) if value is not None else None
if description:
setting.description = description
if category:
setting.category = category
if value_type:
setting.value_type = value_type
else:
# Create new
setting = SystemSettings(
key=key,
value=str(value) if value is not None else None,
description=description,
category=category,
value_type=value_type or "string"
)
session.add(setting)
session.commit()
# Invalidate cache
self._cache_valid = False
logger.info(f"Setting updated: {key}={value}")
return True
def get_by_category(self, category: str) -> List[SystemSettings]:
"""
Get all settings in a category.
Args:
category: Category name
Returns:
List of SystemSettings objects
"""
with database.get_session() as session:
settings = session.query(SystemSettings).filter(
SystemSettings.category == category
).all()
# Detach from session
for setting in settings:
session.expunge(setting)
return settings
def get_all(self) -> List[SystemSettings]:
"""
Get all settings.
Returns:
List of SystemSettings objects
"""
with database.get_session() as session:
settings = session.query(SystemSettings).all()
# Detach from session
for setting in settings:
session.expunge(setting)
return settings
def delete(self, key: str) -> bool:
"""
Delete a setting.
Args:
key: Setting key
Returns:
True if deleted, False if not found
"""
with database.get_session() as session:
setting = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if not setting:
return False
session.delete(setting)
session.commit()
# Invalidate cache
self._cache_valid = False
logger.info(f"Setting deleted: {key}")
return True
def bulk_update(self, settings: Dict[str, Any]) -> bool:
"""
Update multiple settings at once.
Args:
settings: Dictionary of key-value pairs
Returns:
True if successful
"""
with database.get_session() as session:
for key, value in settings.items():
setting = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if setting:
setting.value = str(value) if value is not None else None
else:
logger.warning(f"Setting not found for bulk update: {key}")
session.commit()
# Invalidate cache
self._cache_valid = False
logger.info(f"Bulk updated {len(settings)} settings")
return True
def init_default_settings(self):
"""
Initialize default settings if they don't exist.
Called on first run or after database reset.
"""
defaults = self._get_default_settings()
with database.get_session() as session:
for key, config in defaults.items():
existing = session.query(SystemSettings).filter(SystemSettings.key == key).first()
if not existing:
setting = SystemSettings(
key=key,
value=str(config["value"]) if config["value"] is not None else None,
description=config.get("description"),
category=config.get("category"),
value_type=config.get("value_type", "string")
)
session.add(setting)
logger.info(f"Created default setting: {key}")
session.commit()
# Invalidate cache
self._cache_valid = False
logger.info("Default settings initialized")
def _load_cache(self):
"""Load all settings into cache."""
with database.get_session() as session:
settings = session.query(SystemSettings).all()
self._cache = {}
for setting in settings:
self._cache[setting.key] = setting.get_parsed_value()
self._cache_valid = True
def _get_default_settings(self) -> Dict[str, Dict]:
"""
Get default settings configuration.
All settings have sensible defaults. Configuration is managed
through the Web UI Settings page or the Settings API.
Returns:
Dictionary of setting configurations
"""
return {
# === General ===
"operation_mode": {
"value": "standalone",
"description": "Operation mode: standalone, provider, or standalone,provider",
"category": "general",
"value_type": "string"
},
"library_paths": {
"value": "",
"description": "Comma-separated library paths to scan",
"category": "general",
"value_type": "list"
},
"api_host": {
"value": "0.0.0.0",
"description": "API server host",
"category": "general",
"value_type": "string"
},
"api_port": {
"value": "8000",
"description": "API server port",
"category": "general",
"value_type": "integer"
},
"debug": {
"value": "false",
"description": "Enable debug mode",
"category": "general",
"value_type": "boolean"
},
"setup_completed": {
"value": "false",
"description": "Whether setup wizard has been completed",
"category": "general",
"value_type": "boolean"
},
# === Workers ===
"worker_cpu_count": {
"value": "0",
"description": "Number of CPU workers to start on boot",
"category": "workers",
"value_type": "integer"
},
"worker_gpu_count": {
"value": "0",
"description": "Number of GPU workers to start on boot",
"category": "workers",
"value_type": "integer"
},
"concurrent_transcriptions": {
"value": "2",
"description": "Maximum concurrent transcriptions",
"category": "workers",
"value_type": "integer"
},
"worker_healthcheck_interval": {
"value": "60",
"description": "Worker health check interval (seconds)",
"category": "workers",
"value_type": "integer"
},
"worker_auto_restart": {
"value": "true",
"description": "Auto-restart failed workers",
"category": "workers",
"value_type": "boolean"
},
"clear_vram_on_complete": {
"value": "true",
"description": "Clear VRAM after job completion",
"category": "workers",
"value_type": "boolean"
},
# === Whisper/Transcription ===
"whisper_model": {
"value": "medium",
"description": "Whisper model: tiny, base, small, medium, large-v3, large-v3-turbo",
"category": "transcription",
"value_type": "string"
},
"model_path": {
"value": "./models",
"description": "Path to store Whisper models",
"category": "transcription",
"value_type": "string"
},
"transcribe_device": {
"value": "cpu",
"description": "Device for transcription (cpu, cuda, gpu)",
"category": "transcription",
"value_type": "string"
},
"cpu_compute_type": {
"value": "auto",
"description": "CPU compute type (auto, int8, float32)",
"category": "transcription",
"value_type": "string"
},
"gpu_compute_type": {
"value": "auto",
"description": "GPU compute type (auto, float16, float32, int8_float16, int8)",
"category": "transcription",
"value_type": "string"
},
"whisper_threads": {
"value": "4",
"description": "Number of CPU threads for Whisper",
"category": "transcription",
"value_type": "integer"
},
"transcribe_or_translate": {
"value": "transcribe",
"description": "Default mode: transcribe or translate",
"category": "transcription",
"value_type": "string"
},
"word_level_highlight": {
"value": "false",
"description": "Enable word-level highlighting in subtitles",
"category": "transcription",
"value_type": "boolean"
},
"detect_language_length": {
"value": "30",
"description": "Seconds of audio to use for language detection",
"category": "transcription",
"value_type": "integer"
},
"detect_language_offset": {
"value": "0",
"description": "Offset in seconds for language detection sample",
"category": "transcription",
"value_type": "integer"
},
# === Subtitle Settings ===
"subtitle_language_name": {
"value": "",
"description": "Custom subtitle language name",
"category": "subtitles",
"value_type": "string"
},
"subtitle_language_naming_type": {
"value": "ISO_639_2_B",
"description": "Language naming: ISO_639_1, ISO_639_2_T, ISO_639_2_B, NAME, NATIVE",
"category": "subtitles",
"value_type": "string"
},
"custom_regroup": {
"value": "cm_sl=84_sl=42++++++1",
"description": "Custom regrouping algorithm for subtitles",
"category": "subtitles",
"value_type": "string"
},
# === Skip Configuration ===
"skip_if_external_subtitles_exist": {
"value": "false",
"description": "Skip if any external subtitle exists",
"category": "skip",
"value_type": "boolean"
},
"skip_if_target_subtitles_exist": {
"value": "true",
"description": "Skip if target language subtitle already exists",
"category": "skip",
"value_type": "boolean"
},
"skip_if_internal_subtitles_language": {
"value": "",
"description": "Skip if internal subtitle in this language exists",
"category": "skip",
"value_type": "string"
},
"skip_subtitle_languages": {
"value": "",
"description": "Pipe-separated language codes to skip",
"category": "skip",
"value_type": "list"
},
"skip_if_audio_languages": {
"value": "",
"description": "Skip if audio track is in these languages",
"category": "skip",
"value_type": "list"
},
"skip_unknown_language": {
"value": "false",
"description": "Skip files with unknown audio language",
"category": "skip",
"value_type": "boolean"
},
"skip_only_subgen_subtitles": {
"value": "false",
"description": "Only skip SubGen-generated subtitles",
"category": "skip",
"value_type": "boolean"
},
# === Scanner ===
"scanner_enabled": {
"value": "true",
"description": "Enable library scanner",
"category": "scanner",
"value_type": "boolean"
},
"scanner_cron": {
"value": "0 2 * * *",
"description": "Cron expression for scheduled scans",
"category": "scanner",
"value_type": "string"
},
"watcher_enabled": {
"value": "false",
"description": "Enable real-time file watcher",
"category": "scanner",
"value_type": "boolean"
},
"auto_scan_enabled": {
"value": "false",
"description": "Enable automatic scheduled scanning",
"category": "scanner",
"value_type": "boolean"
},
"scan_interval_minutes": {
"value": "30",
"description": "Scan interval in minutes",
"category": "scanner",
"value_type": "integer"
},
# === Bazarr Provider ===
"bazarr_provider_enabled": {
"value": "false",
"description": "Enable Bazarr provider mode",
"category": "bazarr",
"value_type": "boolean"
},
"bazarr_url": {
"value": "http://bazarr:6767",
"description": "Bazarr server URL",
"category": "bazarr",
"value_type": "string"
},
"bazarr_api_key": {
"value": "",
"description": "Bazarr API key",
"category": "bazarr",
"value_type": "string"
},
"provider_timeout_seconds": {
"value": "600",
"description": "Provider request timeout in seconds",
"category": "bazarr",
"value_type": "integer"
},
"provider_callback_enabled": {
"value": "true",
"description": "Enable callback to Bazarr on completion",
"category": "bazarr",
"value_type": "boolean"
},
"provider_polling_interval": {
"value": "30",
"description": "Polling interval for Bazarr jobs",
"category": "bazarr",
"value_type": "integer"
},
# === Advanced ===
"force_detected_language_to": {
"value": "",
"description": "Force detected language to specific code",
"category": "advanced",
"value_type": "string"
},
"preferred_audio_languages": {
"value": "eng",
"description": "Pipe-separated preferred audio languages",
"category": "advanced",
"value_type": "list"
},
"use_path_mapping": {
"value": "false",
"description": "Enable path mapping for network shares",
"category": "advanced",
"value_type": "boolean"
},
"path_mapping_from": {
"value": "/tv",
"description": "Path mapping source",
"category": "advanced",
"value_type": "string"
},
"path_mapping_to": {
"value": "/Volumes/TV",
"description": "Path mapping destination",
"category": "advanced",
"value_type": "string"
},
"lrc_for_audio_files": {
"value": "true",
"description": "Generate LRC files for audio-only files",
"category": "advanced",
"value_type": "boolean"
},
}
# Global settings service instance
settings_service = SettingsService()

View File

@@ -0,0 +1,294 @@
"""System resource monitoring service."""
import logging
import platform
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# Try to import psutil (CPU/RAM monitoring)
try:
import psutil
PSUTIL_AVAILABLE = True
except ImportError:
PSUTIL_AVAILABLE = False
logger.warning("psutil not installed. CPU/RAM monitoring will be unavailable.")
# Try to import pynvml (NVIDIA GPU monitoring)
try:
import pynvml
pynvml.nvmlInit()
NVML_AVAILABLE = True
except Exception as e:
NVML_AVAILABLE = False
logger.debug(f"pynvml not available: {e}. GPU monitoring will be unavailable.")
class SystemMonitor:
"""Monitor system resources: CPU, RAM, GPU, VRAM."""
def __init__(self):
"""Initialize system monitor."""
self.gpu_count = 0
if NVML_AVAILABLE:
try:
self.gpu_count = pynvml.nvmlDeviceGetCount()
logger.info(f"Detected {self.gpu_count} NVIDIA GPU(s)")
except Exception as e:
logger.warning(f"Could not get GPU count: {e}")
self.gpu_count = 0
def get_cpu_info(self) -> Dict[str, any]:
"""
Get CPU usage information.
Returns:
Dictionary with CPU stats
"""
if not PSUTIL_AVAILABLE:
return {
"available": False,
"error": "psutil not installed"
}
try:
cpu_percent = psutil.cpu_percent(interval=0.1, percpu=False)
cpu_count = psutil.cpu_count(logical=True)
cpu_count_physical = psutil.cpu_count(logical=False)
# Get per-core usage
cpu_percent_per_core = psutil.cpu_percent(interval=0.1, percpu=True)
# Get CPU frequency
cpu_freq = psutil.cpu_freq()
freq_current = cpu_freq.current if cpu_freq else None
freq_max = cpu_freq.max if cpu_freq else None
return {
"available": True,
"usage_percent": round(cpu_percent, 1),
"count_logical": cpu_count,
"count_physical": cpu_count_physical,
"per_core_usage": [round(p, 1) for p in cpu_percent_per_core],
"frequency_mhz": round(freq_current, 0) if freq_current else None,
"frequency_max_mhz": round(freq_max, 0) if freq_max else None,
}
except Exception as e:
logger.error(f"Error getting CPU info: {e}")
return {
"available": False,
"error": str(e)
}
def get_memory_info(self) -> Dict[str, any]:
"""
Get RAM usage information.
Returns:
Dictionary with memory stats
"""
if not PSUTIL_AVAILABLE:
return {
"available": False,
"error": "psutil not installed"
}
try:
mem = psutil.virtual_memory()
return {
"available": True,
"total_gb": round(mem.total / (1024**3), 2),
"used_gb": round(mem.used / (1024**3), 2),
"free_gb": round(mem.available / (1024**3), 2),
"usage_percent": round(mem.percent, 1),
"total_bytes": mem.total,
"used_bytes": mem.used,
"available_bytes": mem.available,
}
except Exception as e:
logger.error(f"Error getting memory info: {e}")
return {
"available": False,
"error": str(e)
}
def get_swap_info(self) -> Dict[str, any]:
"""
Get swap memory information.
Returns:
Dictionary with swap stats
"""
if not PSUTIL_AVAILABLE:
return {
"available": False,
"error": "psutil not installed"
}
try:
swap = psutil.swap_memory()
return {
"available": True,
"total_gb": round(swap.total / (1024**3), 2),
"used_gb": round(swap.used / (1024**3), 2),
"free_gb": round(swap.free / (1024**3), 2),
"usage_percent": round(swap.percent, 1),
"total_bytes": swap.total,
"used_bytes": swap.used,
"free_bytes": swap.free,
}
except Exception as e:
logger.error(f"Error getting swap info: {e}")
return {
"available": False,
"error": str(e)
}
def get_gpu_info(self, device_id: int = 0) -> Dict[str, any]:
"""
Get GPU information for a specific device.
Args:
device_id: GPU device ID (default: 0)
Returns:
Dictionary with GPU stats
"""
if not NVML_AVAILABLE:
return {
"available": False,
"device_id": device_id,
"error": "pynvml not available or no NVIDIA GPUs detected"
}
if device_id >= self.gpu_count:
return {
"available": False,
"device_id": device_id,
"error": f"GPU device {device_id} not found. Only {self.gpu_count} GPU(s) available."
}
try:
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
# Get GPU name
name = pynvml.nvmlDeviceGetName(handle)
if isinstance(name, bytes):
name = name.decode('utf-8')
# Get memory info
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
# Get utilization
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
# Get temperature
try:
temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
except Exception:
temp = None
# Get power usage
try:
power_usage = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0 # Convert mW to W
power_limit = pynvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
except Exception:
power_usage = None
power_limit = None
# Get fan speed
try:
fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
except Exception:
fan_speed = None
return {
"available": True,
"device_id": device_id,
"name": name,
"memory": {
"total_gb": round(mem_info.total / (1024**3), 2),
"used_gb": round(mem_info.used / (1024**3), 2),
"free_gb": round(mem_info.free / (1024**3), 2),
"usage_percent": round((mem_info.used / mem_info.total) * 100, 1),
"total_bytes": mem_info.total,
"used_bytes": mem_info.used,
"free_bytes": mem_info.free,
},
"utilization": {
"gpu_percent": util.gpu,
"memory_percent": util.memory,
},
"temperature_c": temp,
"power": {
"usage_watts": round(power_usage, 1) if power_usage else None,
"limit_watts": round(power_limit, 1) if power_limit else None,
"usage_percent": round((power_usage / power_limit) * 100, 1) if (power_usage and power_limit) else None,
},
"fan_speed_percent": fan_speed,
}
except Exception as e:
logger.error(f"Error getting GPU {device_id} info: {e}")
return {
"available": False,
"device_id": device_id,
"error": str(e)
}
def get_all_gpus_info(self) -> List[Dict[str, any]]:
"""
Get information for all available GPUs.
Returns:
List of GPU info dictionaries
"""
if not NVML_AVAILABLE or self.gpu_count == 0:
return []
return [self.get_gpu_info(i) for i in range(self.gpu_count)]
def get_system_info(self) -> Dict[str, any]:
"""
Get general system information.
Returns:
Dictionary with system info
"""
return {
"platform": platform.system(),
"platform_release": platform.release(),
"platform_version": platform.version(),
"architecture": platform.machine(),
"processor": platform.processor(),
"python_version": platform.python_version(),
}
def get_all_resources(self) -> Dict[str, any]:
"""
Get all system resources in a single call.
Returns:
Comprehensive system resource dictionary
"""
return {
"system": self.get_system_info(),
"cpu": self.get_cpu_info(),
"memory": self.get_memory_info(),
"swap": self.get_swap_info(),
"gpus": self.get_all_gpus_info(),
"gpu_count": self.gpu_count,
}
def __del__(self):
"""Cleanup NVML on destruction."""
if NVML_AVAILABLE:
try:
pynvml.nvmlShutdown()
except Exception:
pass
# Global system monitor instance
system_monitor = SystemMonitor()

View File

@@ -1,10 +1,11 @@
"""Individual worker for processing transcription jobs."""
import logging
import multiprocessing as mp
import os
import time
import traceback
from datetime import datetime
from enum import Enum
from datetime import datetime, timezone
from enum import IntEnum, Enum
from typing import Optional
from backend.core.database import Database
@@ -20,13 +21,23 @@ class WorkerType(str, Enum):
GPU = "gpu"
class WorkerStatus(str, Enum):
class WorkerStatus(IntEnum):
"""Worker status states."""
IDLE = "idle"
BUSY = "busy"
STOPPING = "stopping"
STOPPED = "stopped"
ERROR = "error"
IDLE = 0
BUSY = 1
STOPPING = 2
STOPPED = 3
ERROR = 4
def to_string(self) -> str:
"""Convert to string representation."""
return {
0: "idle",
1: "busy",
2: "stopping",
3: "stopped",
4: "error"
}.get(self.value, "unknown")
class Worker:
@@ -79,13 +90,13 @@ class Worker:
daemon=True
)
self.process.start()
self.started_at = datetime.utcnow()
self.started_at = datetime.now(timezone.utc)
logger.info(
f"Worker {self.worker_id} started (PID: {self.process.pid}, "
f"Type: {self.worker_type.value})"
)
def stop(self, timeout: float = 30.0):
def stop(self, timeout: float = 5.0):
"""
Stop the worker process gracefully.
@@ -93,7 +104,7 @@ class Worker:
timeout: Maximum time to wait for worker to stop
"""
if not self.process or not self.process.is_alive():
logger.warning(f"Worker {self.worker_id} is not running")
logger.debug(f"Worker {self.worker_id} is not running")
return
logger.info(f"Stopping worker {self.worker_id}...")
@@ -103,11 +114,12 @@ class Worker:
if self.process.is_alive():
logger.warning(f"Worker {self.worker_id} did not stop gracefully, terminating...")
self.process.terminate()
self.process.join(timeout=5.0)
self.process.join(timeout=2.0)
if self.process.is_alive():
logger.error(f"Worker {self.worker_id} did not terminate, killing...")
self.process.kill()
self.process.join(timeout=1.0)
logger.info(f"Worker {self.worker_id} stopped")
@@ -130,7 +142,7 @@ class Worker:
"worker_id": self.worker_id,
"type": self.worker_type.value,
"device_id": self.device_id,
"status": status_enum.value,
"status": status_enum.to_string(), # Convert to string
"current_job_id": current_job if current_job else None,
"jobs_completed": self.jobs_completed.value,
"jobs_failed": self.jobs_failed.value,
@@ -205,75 +217,244 @@ class Worker:
def _process_job(self, job: Job, queue_mgr: QueueManager):
"""
Process a single transcription job.
Process a job (transcription or language detection).
Args:
job: Job to process
queue_mgr: Queue manager for updating progress
"""
# TODO: This will be implemented when we add the transcriber module
# For now, simulate work
from backend.core.models import JobType
# Stage 1: Detect language
queue_mgr.update_job_progress(
job.id,
progress=10.0,
stage=JobStage.DETECTING_LANGUAGE,
eta_seconds=60
)
time.sleep(2) # Simulate work
# Route to appropriate handler based on job type
if job.job_type == JobType.LANGUAGE_DETECTION:
self._process_language_detection(job, queue_mgr)
else:
self._process_transcription(job, queue_mgr)
# Stage 2: Extract audio
queue_mgr.update_job_progress(
job.id,
progress=20.0,
stage=JobStage.EXTRACTING_AUDIO,
eta_seconds=50
)
time.sleep(2)
def _process_language_detection(self, job: Job, queue_mgr: QueueManager):
"""
Process a language detection job using fast Whisper model.
# Stage 3: Transcribe
queue_mgr.update_job_progress(
job.id,
progress=30.0,
stage=JobStage.TRANSCRIBING,
eta_seconds=40
)
Args:
job: Language detection job
queue_mgr: Queue manager for updating progress
"""
start_time = time.time()
# Simulate progressive transcription
for i in range(30, 90, 10):
time.sleep(1)
try:
logger.info(f"Worker {self.worker_id} processing LANGUAGE DETECTION job {job.id}: {job.file_name}")
# Stage 1: Detecting language (20% progress)
queue_mgr.update_job_progress(
job.id,
progress=float(i),
stage=JobStage.TRANSCRIBING,
eta_seconds=int((100 - i) / 2)
job.id, progress=20.0, stage=JobStage.DETECTING_LANGUAGE, eta_seconds=10
)
# Stage 4: Finalize
queue_mgr.update_job_progress(
job.id,
progress=95.0,
stage=JobStage.FINALIZING,
eta_seconds=5
)
time.sleep(1)
# Use language detector with tiny model
from backend.scanning.language_detector import LanguageDetector
# Mark as completed
output_path = job.file_path.replace('.mkv', '.srt')
queue_mgr.mark_job_completed(
job.id,
output_path=output_path,
segments_count=100, # Simulated
srt_content="Simulated SRT content"
)
language, confidence = LanguageDetector.detect_language(
file_path=job.file_path,
sample_duration=30
)
# Stage 2: Finalizing (80% progress)
queue_mgr.update_job_progress(
job.id, progress=80.0, stage=JobStage.FINALIZING, eta_seconds=2
)
if language:
# Calculate processing time
processing_time = time.time() - start_time
# Use ISO 639-1 format (ja, en, es) throughout the system
lang_code = language.value[0] if language else "unknown"
result_text = f"Language detected: {lang_code} ({language.name.title() if language else 'Unknown'})\nConfidence: {confidence}%"
# Store in ISO 639-1 format (ja, en, es) for consistency
queue_mgr.mark_job_completed(
job.id,
output_path=None,
segments_count=0,
srt_content=result_text,
detected_language=lang_code # Use ISO 639-1 (ja, en, es)
)
logger.info(
f"Worker {self.worker_id} completed detection job {job.id}: "
f"{lang_code} (confidence: {confidence}%) in {processing_time:.1f}s"
)
# Check if file matches any scan rules and queue transcription job
self._check_and_queue_transcription(job, lang_code)
else:
# Detection failed
queue_mgr.mark_job_failed(job.id, "Language detection failed - could not detect language")
logger.error(f"Worker {self.worker_id} failed detection job {job.id}: No language detected")
except Exception as e:
logger.error(f"Worker {self.worker_id} failed detection job {job.id}: {e}", exc_info=True)
queue_mgr.mark_job_failed(job.id, str(e))
def _process_transcription(self, job: Job, queue_mgr: QueueManager):
"""
Process a transcription/translation job using Whisper.
Args:
job: Transcription job
queue_mgr: Queue manager for updating progress
"""
from backend.transcription import WhisperTranscriber
from backend.transcription.audio_utils import handle_multiple_audio_tracks
from backend.core.language_code import LanguageCode
transcriber = None
start_time = time.time()
try:
logger.info(f"Worker {self.worker_id} processing TRANSCRIPTION job {job.id}: {job.file_name}")
# Stage 1: Loading model
queue_mgr.update_job_progress(
job.id, progress=5.0, stage=JobStage.LOADING_MODEL, eta_seconds=None
)
# Determine device for transcriber
if self.worker_type == WorkerType.GPU:
device = f"cuda:{self.device_id}" if self.device_id is not None else "cuda"
else:
device = "cpu"
transcriber = WhisperTranscriber(device=device)
transcriber.load_model()
# Stage 2: Preparing audio
queue_mgr.update_job_progress(
job.id, progress=10.0, stage=JobStage.EXTRACTING_AUDIO, eta_seconds=None
)
# Handle multiple audio tracks if needed
source_lang = (
LanguageCode.from_string(job.source_lang) if job.source_lang else None
)
audio_data = handle_multiple_audio_tracks(job.file_path, source_lang)
# Stage 3: Transcribing
queue_mgr.update_job_progress(
job.id, progress=15.0, stage=JobStage.TRANSCRIBING, eta_seconds=None
)
# Progress callback for real-time updates
def progress_callback(seek, total):
# Reserve 15%-75% for Whisper (60% range)
# If translate mode, reserve 75%-90% for translation (15% range)
whisper_progress = 15.0 + (seek / total) * 60.0
queue_mgr.update_job_progress(job.id, progress=whisper_progress, stage=JobStage.TRANSCRIBING)
# Stage 3A: Whisper transcription to English
# IMPORTANT: Both 'transcribe' and 'translate' modes use task='translate' here
# to convert audio to English subtitles
logger.info(f"Running Whisper with task='translate' to convert audio to English")
# job.source_lang is already in ISO 639-1 format (ja, en, es)
# Whisper accepts ISO 639-1, so we can use it directly
if audio_data:
result = transcriber.transcribe_audio_data(
audio_data=audio_data.read(),
language=job.source_lang, # Already ISO 639-1 (ja, en, es)
task="translate", # ALWAYS translate to English first
progress_callback=progress_callback,
)
else:
result = transcriber.transcribe_file(
file_path=job.file_path,
language=job.source_lang, # Already ISO 639-1 (ja, en, es)
task="translate", # ALWAYS translate to English first
progress_callback=progress_callback,
)
# Generate English SRT filename
file_base = os.path.splitext(job.file_path)[0]
english_srt_path = f"{file_base}.eng.srt"
# Save English SRT
result.to_srt(english_srt_path, word_level=False)
logger.info(f"English subtitles saved to {english_srt_path}")
# Stage 3B: Optional translation to target language
if job.transcribe_or_translate == "translate" and job.target_lang and job.target_lang.lower() != "eng":
queue_mgr.update_job_progress(
job.id, progress=75.0, stage=JobStage.FINALIZING, eta_seconds=10
)
logger.info(f"Translating English subtitles to {job.target_lang}")
from backend.transcription import translate_srt_file
# Generate target language SRT filename
target_srt_path = f"{file_base}.{job.target_lang}.srt"
# Translate English SRT to target language
success = translate_srt_file(
input_path=english_srt_path,
output_path=target_srt_path,
target_language=job.target_lang
)
if success:
logger.info(f"Translated subtitles saved to {target_srt_path}")
output_path = target_srt_path
else:
logger.warning(f"Translation failed, keeping English subtitles only")
output_path = english_srt_path
else:
# For 'transcribe' mode or if target is English, use English SRT
output_path = english_srt_path
# Stage 4: Finalize
queue_mgr.update_job_progress(
job.id, progress=90.0, stage=JobStage.FINALIZING, eta_seconds=5
)
# Calculate processing time
processing_time = time.time() - start_time
# Get SRT content for storage
srt_content = result.get_srt_content()
# Mark job as completed
queue_mgr.mark_job_completed(
job.id,
output_path=output_path,
segments_count=result.segments_count,
srt_content=srt_content,
model_used=transcriber.model_name,
device_used=transcriber.device,
processing_time_seconds=processing_time,
)
logger.info(
f"Worker {self.worker_id} completed job {job.id}: "
f"{result.segments_count} segments in {processing_time:.1f}s"
)
except Exception as e:
logger.error(f"Worker {self.worker_id} failed job {job.id}: {e}", exc_info=True)
queue_mgr.mark_job_failed(job.id, str(e))
finally:
# Always unload model after job
if transcriber:
try:
transcriber.unload_model()
except Exception as e:
logger.error(f"Error unloading model: {e}")
def _set_status(self, status: WorkerStatus):
"""Set worker status (thread-safe)."""
self.status.value = status.value
def _set_current_job(self, job_id: str):
"""Set current job ID (thread-safe)."""
"""Set the current job ID (thread-safe)."""
job_id_bytes = job_id.encode('utf-8')
for i, byte in enumerate(job_id_bytes):
if i < len(self.current_job_id):
@@ -282,4 +463,31 @@ class Worker:
def _clear_current_job(self):
"""Clear current job ID (thread-safe)."""
for i in range(len(self.current_job_id)):
self.current_job_id[i] = b'\x00'
self.current_job_id[i] = b'\x00'
def _check_and_queue_transcription(self, job: Job, detected_lang_code: str):
"""
Check if detected language matches any scan rules and queue transcription job.
Args:
job: Completed language detection job
detected_lang_code: Detected language code (ISO 639-1, e.g., 'ja', 'en')
"""
try:
from backend.scanning.library_scanner import library_scanner
logger.info(
f"Language detection completed for {job.file_path}: {detected_lang_code}. "
f"Checking scan rules..."
)
# Use the scanner's method to check rules and queue transcription
library_scanner._check_and_queue_transcription_for_file(
job.file_path, detected_lang_code
)
except Exception as e:
logger.error(
f"Error checking scan rules for {job.file_path}: {e}",
exc_info=True
)

339
backend/core/worker_pool.py Normal file
View File

@@ -0,0 +1,339 @@
"""Worker pool orchestrator for managing transcription workers."""
import logging
import time
from typing import Dict, List, Optional
from datetime import datetime, timezone
from backend.core.worker import Worker, WorkerType, WorkerStatus
from backend.core.queue_manager import queue_manager
logger = logging.getLogger(__name__)
class WorkerPool:
"""
Orchestrator for managing a pool of transcription workers.
Similar to Tdarr's worker management system, this class handles:
- Dynamic worker creation/removal (CPU and GPU)
- Worker health monitoring
- Load balancing via the queue
- Worker statistics and reporting
- Graceful shutdown
Workers are managed as separate processes that pull jobs from the
persistent queue. The pool can be controlled via WebUI to add/remove
workers on-demand.
"""
def __init__(self):
"""Initialize worker pool."""
self.workers: Dict[str, Worker] = {}
self.is_running = False
self.started_at: Optional[datetime] = None
logger.info("WorkerPool initialized")
def start(self, cpu_workers: int = 0, gpu_workers: int = 0):
"""
Start the worker pool with specified number of workers.
Args:
cpu_workers: Number of CPU workers to start
gpu_workers: Number of GPU workers to start
"""
if self.is_running:
logger.warning("WorkerPool is already running")
return
self.is_running = True
self.started_at = datetime.now(timezone.utc)
# Start CPU workers
for i in range(cpu_workers):
self.add_worker(WorkerType.CPU)
# Start GPU workers
for i in range(gpu_workers):
self.add_worker(WorkerType.GPU, device_id=i % self._get_gpu_count())
logger.info(
f"WorkerPool started: {cpu_workers} CPU workers, {gpu_workers} GPU workers"
)
def stop(self, timeout: float = 30.0):
"""
Stop all workers gracefully.
Args:
timeout: Maximum time to wait for each worker to stop
"""
if not self.is_running:
logger.warning("WorkerPool is not running")
return
logger.info(f"Stopping WorkerPool with {len(self.workers)} workers...")
# Stop all workers
for worker_id, worker in list(self.workers.items()):
logger.info(f"Stopping worker {worker_id}")
worker.stop(timeout=timeout)
self.workers.clear()
self.is_running = False
logger.info("WorkerPool stopped")
def add_worker(
self,
worker_type: WorkerType,
device_id: Optional[int] = None
) -> str:
"""
Add a new worker to the pool.
Args:
worker_type: CPU or GPU
device_id: GPU device ID (only for GPU workers)
Returns:
Worker ID
"""
# Generate unique worker ID
worker_id = self._generate_worker_id(worker_type, device_id)
if worker_id in self.workers:
logger.warning(f"Worker {worker_id} already exists")
return worker_id
# Create and start worker
worker = Worker(worker_id, worker_type, device_id)
worker.start()
self.workers[worker_id] = worker
logger.info(f"Added worker {worker_id} ({worker_type.value})")
return worker_id
def remove_worker(self, worker_id: str, timeout: float = 30.0) -> bool:
"""
Remove a worker from the pool.
Args:
worker_id: Worker ID to remove
timeout: Maximum time to wait for worker to stop
Returns:
True if worker was removed, False otherwise
"""
worker = self.workers.get(worker_id)
if not worker:
logger.warning(f"Worker {worker_id} not found")
return False
logger.info(f"Removing worker {worker_id}")
worker.stop(timeout=timeout)
del self.workers[worker_id]
logger.info(f"Worker {worker_id} removed")
return True
def get_worker_status(self, worker_id: str) -> Optional[dict]:
"""
Get status of a specific worker.
Args:
worker_id: Worker ID
Returns:
Worker status dict or None if not found
"""
worker = self.workers.get(worker_id)
if not worker:
return None
return worker.get_status()
def get_all_workers_status(self) -> List[dict]:
"""
Get status of all workers.
Returns:
List of worker status dicts
"""
return [worker.get_status() for worker in self.workers.values()]
def get_pool_stats(self) -> dict:
"""
Get overall pool statistics.
Returns:
Dictionary with pool statistics
"""
total_workers = len(self.workers)
cpu_workers = sum(1 for w in self.workers.values() if w.worker_type == WorkerType.CPU)
gpu_workers = sum(1 for w in self.workers.values() if w.worker_type == WorkerType.GPU)
# Count workers by status
idle_workers = 0
busy_workers = 0
stopped_workers = 0
error_workers = 0
for worker in self.workers.values():
status_dict = worker.get_status()
status = status_dict["status"] # This is a string like "idle", "busy", etc.
if status == "idle":
idle_workers += 1
elif status == "busy":
busy_workers += 1
elif status == "stopped":
stopped_workers += 1
elif status == "error":
error_workers += 1
# Get total jobs processed
total_completed = sum(w.jobs_completed.value for w in self.workers.values())
total_failed = sum(w.jobs_failed.value for w in self.workers.values())
# Get queue stats
queue_stats = queue_manager.get_queue_stats()
return {
"pool": {
"is_running": self.is_running,
"started_at": self.started_at.isoformat() if self.started_at else None,
"total_workers": total_workers,
"cpu_workers": cpu_workers,
"gpu_workers": gpu_workers,
"idle_workers": idle_workers,
"busy_workers": busy_workers,
"stopped_workers": stopped_workers,
"error_workers": error_workers,
},
"jobs": {
"completed": total_completed,
"failed": total_failed,
"success_rate": (
total_completed / (total_completed + total_failed) * 100
if (total_completed + total_failed) > 0
else 0
),
},
"queue": queue_stats,
}
def health_check(self) -> dict:
"""
Perform health check on all workers.
Restarts dead workers automatically.
Returns:
Health check results
"""
dead_workers = []
restarted_workers = []
for worker_id, worker in list(self.workers.items()):
if not worker.is_alive():
logger.warning(f"Worker {worker_id} is dead, restarting...")
dead_workers.append(worker_id)
# Try to restart
try:
worker.start()
restarted_workers.append(worker_id)
logger.info(f"Worker {worker_id} restarted successfully")
except Exception as e:
logger.error(f"Failed to restart worker {worker_id}: {e}")
return {
"timestamp": datetime.now(timezone.utc).isoformat(),
"total_workers": len(self.workers),
"dead_workers": dead_workers,
"restarted_workers": restarted_workers,
"healthy": len(dead_workers) == 0,
}
def auto_scale(self, target_workers: int):
"""
Auto-scale workers based on queue size.
This is a placeholder for future auto-scaling logic.
Args:
target_workers: Target number of workers
"""
current_workers = len(self.workers)
if current_workers < target_workers:
# Add workers
workers_to_add = target_workers - current_workers
logger.info(f"Auto-scaling: adding {workers_to_add} workers")
for _ in range(workers_to_add):
# Default to CPU workers for auto-scaling
self.add_worker(WorkerType.CPU)
elif current_workers > target_workers:
# Remove idle workers
workers_to_remove = current_workers - target_workers
logger.info(f"Auto-scaling: removing {workers_to_remove} workers")
# Find idle workers to remove
idle_workers = [
worker_id for worker_id, worker in self.workers.items()
if worker.get_status()["status"] == WorkerStatus.IDLE.value
]
for worker_id in idle_workers[:workers_to_remove]:
self.remove_worker(worker_id)
def _generate_worker_id(
self,
worker_type: WorkerType,
device_id: Optional[int] = None
) -> str:
"""
Generate unique worker ID.
Args:
worker_type: CPU or GPU
device_id: GPU device ID
Returns:
Worker ID string
"""
prefix = "cpu" if worker_type == WorkerType.CPU else f"gpu{device_id}"
# Count existing workers of this type
existing_count = sum(
1 for wid in self.workers.keys()
if wid.startswith(prefix)
)
return f"{prefix}-{existing_count + 1}"
def _get_gpu_count(self) -> int:
"""
Get number of available GPUs.
Returns:
Number of GPUs (defaults to 1 if detection fails)
"""
try:
import torch
if torch.cuda.is_available():
return torch.cuda.device_count()
except ImportError:
pass
return 1 # Default to 1 GPU
# Global worker pool instance
worker_pool = WorkerPool()

View File

@@ -0,0 +1,11 @@
"""Library scanning module for standalone mode."""
from backend.scanning.models import ScanRule
from backend.scanning.file_analyzer import FileAnalyzer, FileAnalysis
from backend.scanning.detected_languages import DetectedLanguage
__all__ = [
"ScanRule",
"FileAnalyzer",
"FileAnalysis",
"DetectedLanguage",
]

View File

@@ -0,0 +1,41 @@
"""Model for storing detected audio languages."""
from sqlalchemy import Column, Integer, String, DateTime, Index
from sqlalchemy.sql import func
from backend.core.database import Base
class DetectedLanguage(Base):
"""
Stores detected audio languages for files where metadata is undefined.
This cache prevents re-detecting the same file multiple times.
"""
__tablename__ = "detected_languages"
id = Column(Integer, primary_key=True, autoincrement=True)
file_path = Column(String(1024), nullable=False, unique=True, index=True)
detected_language = Column(String(10), nullable=False) # ISO 639-1 code
detection_confidence = Column(Integer, nullable=True) # 0-100
detected_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
# Indexes for performance
__table_args__ = (
Index('idx_detected_lang_path', 'file_path'),
Index('idx_detected_lang_language', 'detected_language'),
)
def __repr__(self):
return f"<DetectedLanguage {self.file_path}: {self.detected_language}>"
def to_dict(self) -> dict:
"""Convert to dictionary."""
return {
"id": self.id,
"file_path": self.file_path,
"detected_language": self.detected_language,
"detection_confidence": self.detection_confidence,
"detected_at": self.detected_at.isoformat() if self.detected_at else None,
}

View File

@@ -0,0 +1,253 @@
"""File analyzer using ffprobe for media file inspection."""
import logging
import os
from typing import Optional, List, Dict
from dataclasses import dataclass
from backend.transcription.audio_utils import (
get_audio_tracks,
get_audio_languages,
get_subtitle_languages,
has_audio,
has_subtitle_of_language_in_folder,
)
from backend.core.language_code import LanguageCode
logger = logging.getLogger(__name__)
@dataclass
class AudioTrackInfo:
"""Information about an audio track."""
index: int
language: LanguageCode
codec: str
channels: int
is_default: bool
title: Optional[str] = None
@dataclass
class SubtitleTrackInfo:
"""Information about a subtitle track."""
language: LanguageCode
is_embedded: bool
is_external: bool
file_path: Optional[str] = None
@dataclass
class FileAnalysis:
"""Complete analysis of a media file."""
file_path: str
file_name: str
file_extension: str
has_audio: bool
audio_tracks: List[AudioTrackInfo]
embedded_subtitles: List[LanguageCode]
external_subtitles: List[SubtitleTrackInfo]
@property
def audio_languages(self) -> List[LanguageCode]:
"""Get list of audio languages."""
return [track.language for track in self.audio_tracks]
@property
def all_subtitle_languages(self) -> List[LanguageCode]:
"""Get all subtitle languages (embedded + external)."""
languages = self.embedded_subtitles.copy()
for sub in self.external_subtitles:
if sub.language not in languages:
languages.append(sub.language)
return languages
@property
def default_audio_language(self) -> Optional[LanguageCode]:
"""Get default audio track language."""
for track in self.audio_tracks:
if track.is_default:
return track.language
# Fallback to first track
return self.audio_tracks[0].language if self.audio_tracks else None
def has_subtitle_language(self, language: LanguageCode) -> bool:
"""Check if file has subtitles in given language (embedded or external)."""
return language in self.all_subtitle_languages
def has_embedded_subtitle_language(self, language: LanguageCode) -> bool:
"""Check if file has embedded subtitles in given language."""
return language in self.embedded_subtitles
def has_external_subtitle_language(self, language: LanguageCode) -> bool:
"""Check if file has external subtitles in given language."""
return any(sub.language == language for sub in self.external_subtitles)
class FileAnalyzer:
"""Analyzer for media files using ffprobe."""
# Supported video extensions
VIDEO_EXTENSIONS = (
".mp4",
".mkv",
".avi",
".mov",
".wmv",
".flv",
".webm",
".mpg",
".mpeg",
".3gp",
".ogv",
".vob",
".rm",
".rmvb",
".ts",
".m4v",
".f4v",
".svq3",
".asf",
".m2ts",
".divx",
".xvid",
)
# Subtitle file extensions
SUBTITLE_EXTENSIONS = {".srt", ".vtt", ".sub", ".ass", ".ssa", ".idx", ".sbv"}
@staticmethod
def is_video_file(file_path: str) -> bool:
"""
Check if file is a video file by extension.
Args:
file_path: Path to file
Returns:
True if video file
"""
_, ext = os.path.splitext(file_path)
return ext.lower() in FileAnalyzer.VIDEO_EXTENSIONS
@staticmethod
def analyze_file(file_path: str) -> Optional[FileAnalysis]:
"""
Analyze a media file completely.
Args:
file_path: Path to media file
Returns:
FileAnalysis object or None if analysis fails
"""
try:
# Basic file info
file_name = os.path.basename(file_path)
_, file_extension = os.path.splitext(file_path)
# Check if file is video
if not FileAnalyzer.is_video_file(file_path):
logger.debug(f"Skipping non-video file: {file_name}")
return None
# Check if file exists and has audio
if not os.path.isfile(file_path):
logger.warning(f"File not found: {file_path}")
return None
file_has_audio = has_audio(file_path)
if not file_has_audio:
logger.debug(f"File has no audio, skipping: {file_name}")
return None
# Get audio tracks
audio_tracks_raw = get_audio_tracks(file_path)
audio_tracks = [
AudioTrackInfo(
index=track["index"],
language=track["language"],
codec=track["codec"],
channels=track["channels"],
is_default=track["default"],
title=track.get("title"),
)
for track in audio_tracks_raw
]
# Get embedded subtitles
embedded_subtitles = get_subtitle_languages(file_path)
# Find external subtitles
external_subtitles = FileAnalyzer._find_external_subtitles(file_path)
return FileAnalysis(
file_path=file_path,
file_name=file_name,
file_extension=file_extension.lower(),
has_audio=file_has_audio,
audio_tracks=audio_tracks,
embedded_subtitles=embedded_subtitles,
external_subtitles=external_subtitles,
)
except Exception as e:
logger.error(f"Error analyzing file {file_path}: {e}")
return None
@staticmethod
def _find_external_subtitles(video_file: str) -> List[SubtitleTrackInfo]:
"""
Find external subtitle files for a video.
Args:
video_file: Path to video file
Returns:
List of SubtitleTrackInfo for external subtitles
"""
external_subs = []
video_folder = os.path.dirname(video_file)
video_name = os.path.splitext(os.path.basename(video_file))[0]
try:
for file_name in os.listdir(video_folder):
# Check if it's a subtitle file
if not any(file_name.endswith(ext) for ext in FileAnalyzer.SUBTITLE_EXTENSIONS):
continue
subtitle_path = os.path.join(video_folder, file_name)
subtitle_name, _ = os.path.splitext(file_name)
# Check if subtitle belongs to this video
if not subtitle_name.startswith(video_name):
continue
# Extract language from filename
# Format: video_name.lang.srt or video_name.subgen.medium.lang.srt
parts = subtitle_name[len(video_name) :].lstrip(".").split(".")
# Try to find language code in parts
detected_language = None
for part in parts:
lang = LanguageCode.from_string(part)
if lang != LanguageCode.NONE:
detected_language = lang
break
if detected_language:
external_subs.append(
SubtitleTrackInfo(
language=detected_language,
is_embedded=False,
is_external=True,
file_path=subtitle_path,
)
)
except Exception as e:
logger.error(f"Error finding external subtitles for {video_file}: {e}")
return external_subs

View File

@@ -0,0 +1,295 @@
"""Language detection service using Whisper."""
import logging
from typing import Optional, Tuple
from pathlib import Path
from backend.scanning.detected_languages import DetectedLanguage
from backend.core.language_code import LanguageCode
logger = logging.getLogger(__name__)
class LanguageDetector:
"""
Service for detecting audio language in media files.
Uses Whisper's language detection on a small audio sample.
Results are cached in database to avoid re-detection.
"""
@staticmethod
def detect_language(file_path: str, sample_duration: int = 30) -> Tuple[Optional[LanguageCode], Optional[int]]:
"""
Detect language of audio in a media file.
First checks cache, then uses Whisper if needed.
Args:
file_path: Path to media file
sample_duration: Seconds of audio to analyze (default: 30)
Returns:
Tuple of (LanguageCode, confidence_percentage) or (None, None)
"""
# Check cache first
cached = LanguageDetector._get_cached_language(file_path)
if cached:
logger.info(f"Using cached language for {Path(file_path).name}: {cached}")
# When returning from cache, we don't have confidence stored, use 100%
return cached, 100
# Detect using Whisper
try:
detected_lang, confidence = LanguageDetector._detect_with_whisper(
file_path, sample_duration
)
if detected_lang:
# Cache the result
LanguageDetector._cache_language(file_path, detected_lang, confidence)
logger.info(
f"Detected language for {Path(file_path).name}: "
f"{detected_lang} (confidence: {confidence}%)"
)
return detected_lang, confidence
return None, None
except Exception as e:
logger.error(f"Language detection failed for {file_path}: {e}")
return None, None
@staticmethod
def _get_cached_language(file_path: str) -> Optional[LanguageCode]:
"""
Get cached detected language from database.
Args:
file_path: Path to media file
Returns:
LanguageCode if cached, None otherwise
"""
from backend.core.database import database
with database.get_session() as session:
cached = session.query(DetectedLanguage).filter(
DetectedLanguage.file_path == file_path
).first()
if cached:
return LanguageCode.from_string(cached.detected_language)
return None
@staticmethod
def _cache_language(
file_path: str,
language: LanguageCode,
confidence: Optional[int] = None
):
"""
Cache detected language in database.
Args:
file_path: Path to media file
language: Detected language code
confidence: Detection confidence (0-100)
"""
from backend.core.database import database
with database.get_session() as session:
# Check if entry exists
existing = session.query(DetectedLanguage).filter(
DetectedLanguage.file_path == file_path
).first()
lang_code = language.to_iso_639_1() if language else "und"
if existing:
# Update existing
existing.detected_language = lang_code
existing.detection_confidence = confidence
else:
# Create new
detected = DetectedLanguage(
file_path=file_path,
detected_language=lang_code,
detection_confidence=confidence
)
session.add(detected)
session.commit()
logger.debug(f"Cached language detection: {file_path} -> {lang_code}")
@staticmethod
def _detect_with_whisper(
file_path: str,
sample_duration: int = 30
) -> Tuple[Optional[LanguageCode], Optional[int]]:
"""
Detect language using Whisper model.
Args:
file_path: Path to media file
sample_duration: Seconds of audio to analyze
Returns:
Tuple of (LanguageCode, confidence_percentage) or (None, None)
"""
try:
from backend.transcription.transcriber import WhisperTranscriber, WHISPER_AVAILABLE
from backend.transcription.audio_utils import extract_audio_segment
if not WHISPER_AVAILABLE:
logger.error("Whisper not available - cannot detect language")
return None, None
# Get file duration first to extract from the middle
import ffmpeg
try:
probe = ffmpeg.probe(file_path)
duration = float(probe['format']['duration'])
# Extract from the middle of the file for better detection
# (beginning might have intro music, credits, etc.)
start_time = max(0, (duration / 2) - (sample_duration / 2))
logger.debug(
f"Extracting {sample_duration}s audio sample from middle of {file_path} "
f"(duration: {duration:.1f}s, sample start: {start_time:.1f}s)"
)
except Exception as e:
logger.warning(f"Could not get file duration: {e}, using start of file")
start_time = 0
audio_data = extract_audio_segment(
file_path,
start_time=int(start_time),
duration=sample_duration
)
if not audio_data:
logger.warning(f"Failed to extract audio from {file_path}")
return None, None
# Save audio_data to temporary file since stable-whisper doesn't accept BytesIO
import tempfile
import os
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
temp_audio.write(audio_data.read())
temp_audio_path = temp_audio.name
try:
# Initialize transcriber (will use small/fast model for detection)
transcriber = WhisperTranscriber(model_name="tiny") # Tiny model for fast detection
transcriber.load_model()
# Detect language using Whisper
logger.debug("Detecting language with Whisper...")
# Use transcribe with language=None to trigger auto-detection
# This is more reliable than detect_language()
result = transcriber.model.transcribe(
temp_audio_path, # Use file path instead of BytesIO
language=None, # Auto-detect
task="transcribe",
vad_filter=False, # Don't filter, just detect
beam_size=1, # Faster
best_of=1, # Faster
temperature=0.0, # Deterministic
condition_on_previous_text=False,
initial_prompt=None,
)
if result:
# stable-whisper/faster-whisper returns language info
# Try different attributes that might contain the language code
lang_code_str = None
probability = 1.0
# Try to get language code (2-letter ISO 639-1)
if hasattr(result, 'language_code'):
lang_code_str = result.language_code
elif hasattr(result, 'language'):
# result.language might be full name like "japanese" or code like "ja"
lang = result.language
if len(lang) == 2:
# Already a code
lang_code_str = lang
else:
# Full name - need to map to code
# Common mappings
lang_map = {
'japanese': 'ja',
'english': 'en',
'spanish': 'es',
'french': 'fr',
'german': 'de',
'italian': 'it',
'portuguese': 'pt',
'russian': 'ru',
'chinese': 'zh',
'korean': 'ko',
'arabic': 'ar',
'hindi': 'hi',
}
lang_code_str = lang_map.get(lang.lower())
# Get language probability if available
if hasattr(result, 'language_probability'):
probability = result.language_probability
if lang_code_str:
confidence = int(probability * 100)
language = LanguageCode.from_iso_639_1(lang_code_str)
logger.info(
f"Whisper detected language: {lang_code_str} "
f"(confidence: {confidence}%)"
)
return language, confidence
else:
logger.warning(f"Could not extract language code from result: {result}")
return None, None
finally:
# Clean up temporary file
try:
os.unlink(temp_audio_path)
except Exception as e:
logger.warning(f"Failed to delete temporary audio file: {e}")
except Exception as e:
logger.error(f"Whisper language detection error: {e}", exc_info=True)
return None, None
@staticmethod
def clear_cache(file_path: Optional[str] = None):
"""
Clear language detection cache.
Args:
file_path: Specific file to clear, or None to clear all
"""
from backend.core.database import database
with database.get_session() as session:
if file_path:
session.query(DetectedLanguage).filter(
DetectedLanguage.file_path == file_path
).delete()
logger.info(f"Cleared language cache for {file_path}")
else:
count = session.query(DetectedLanguage).delete()
logger.info(f"Cleared all language cache ({count} entries)")
session.commit()
# Global instance
language_detector = LanguageDetector()

View File

@@ -0,0 +1,894 @@
"""Library scanner with rule-based filtering and scheduling."""
import logging
import os
import time
from typing import List, Optional, Dict
from datetime import datetime, timezone
from pathlib import Path
from apscheduler.schedulers.background import BackgroundScheduler
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from backend.core.database import database
from backend.core.queue_manager import queue_manager
from backend.core.models import QualityPreset
from backend.scanning.models import ScanRule
from backend.scanning.file_analyzer import FileAnalyzer, FileAnalysis
from backend.scanning.language_detector import language_detector
from backend.core.language_code import LanguageCode
logger = logging.getLogger(__name__)
class LibraryFileHandler(FileSystemEventHandler):
"""Watchdog handler for real-time file detection."""
def __init__(self, scanner: "LibraryScanner"):
"""
Initialize file handler.
Args:
scanner: Parent LibraryScanner instance
"""
super().__init__()
self.scanner = scanner
def on_created(self, event: FileCreatedEvent):
"""
Handle new file creation.
Args:
event: File creation event
"""
if event.is_directory:
return
file_path = event.src_path
# Check if it's a video file
if not FileAnalyzer.is_video_file(file_path):
return
# Wait a bit for file to be fully written
time.sleep(5)
logger.info(f"New file detected: {file_path}")
self.scanner.process_file(file_path)
class LibraryScanner:
"""
Library scanner with rule-based filtering.
Scans media libraries, analyzes files with ffprobe, and applies
configurable rules to determine which files need transcription.
Supports:
- One-time manual scans
- Scheduled periodic scans (cron-like)
- Real-time file watching (Tdarr-style)
"""
def __init__(self):
"""Initialize library scanner."""
self.scheduler: Optional[BackgroundScheduler] = None
self.file_observer: Optional[Observer] = None
self.is_scanning = False
self.last_scan_time: Optional[datetime] = None
self.files_scanned = 0
self.files_queued = 0
logger.info("LibraryScanner initialized")
def scan_libraries(self, paths: Optional[List[str]] = None) -> Dict:
"""
Perform a one-time scan of library directories.
Args:
paths: List of directories to scan (uses config if None)
Returns:
Dictionary with scan statistics
"""
if self.is_scanning:
logger.warning("Scan already in progress")
return {"error": "Scan already in progress"}
self.is_scanning = True
self.files_scanned = 0
self.files_queued = 0
scan_start = time.time()
try:
# Get paths from settings_service if not provided
if paths is None:
from backend.core.settings_service import settings_service
library_paths = settings_service.get('library_paths', '')
if not library_paths:
logger.error("No library paths configured")
return {"error": "No library paths configured"}
# Handle both comma and pipe separators
if '|' in library_paths:
paths = [p.strip() for p in library_paths.split("|") if p.strip()]
else:
paths = [p.strip() for p in library_paths.split(",") if p.strip()]
logger.info(f"Starting library scan: {len(paths)} paths")
# Load all enabled rules
rules = self._load_scan_rules()
logger.info(f"Loaded {len(rules)} enabled scan rules")
# Scan each path
for path in paths:
if not os.path.isdir(path):
logger.warning(f"Path not found or not a directory: {path}")
continue
logger.info(f"Scanning: {path}")
self._scan_directory(path, rules)
scan_duration = time.time() - scan_start
self.last_scan_time = datetime.now(timezone.utc)
self._persist_scan_stats(files_in_this_scan=self.files_scanned)
results = {
"status": "completed",
"files_scanned": self.files_scanned,
"files_queued": self.files_queued,
"duration_seconds": round(scan_duration, 2),
"timestamp": self.last_scan_time.isoformat(),
}
logger.info(
f"Scan completed: {self.files_scanned} files scanned, "
f"{self.files_queued} jobs queued in {scan_duration:.1f}s"
)
return results
except Exception as e:
logger.error(f"Scan failed: {e}", exc_info=True)
return {"error": str(e)}
finally:
self.is_scanning = False
def _scan_directory(self, directory: str, rules: List[ScanRule]):
"""
Recursively scan a directory.
Args:
directory: Directory path
rules: List of scan rules to apply
"""
try:
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
self.files_scanned += 1
# Process file
self.process_file(file_path, rules)
except Exception as e:
logger.error(f"Error scanning directory {directory}: {e}")
def process_file(
self, file_path: str, rules: Optional[List[ScanRule]] = None
) -> bool:
"""
Process a single file against scan rules.
Args:
file_path: Path to media file
rules: Optional list of rules (will load if None)
Returns:
True if job was queued, False otherwise
"""
try:
# Analyze file
analysis = FileAnalyzer.analyze_file(file_path)
if not analysis:
return False
# Check if we need language detection
if not analysis.default_audio_language or len(analysis.audio_languages) == 0:
logger.info(
f"Audio language unknown for {analysis.file_name}, "
f"queuing language detection job"
)
return self._queue_language_detection_job(analysis)
# Load rules if not provided
if rules is None:
rules = self._load_scan_rules()
# Evaluate against rules
matching_rule = self._evaluate_rules(analysis, rules)
if matching_rule:
# Queue job based on rule
return self._queue_job_from_rule(analysis, matching_rule)
return False
except Exception as e:
logger.error(f"Error processing file {file_path}: {e}")
return False
def _evaluate_rules(
self, file_analysis: FileAnalysis, rules: List[ScanRule]
) -> Optional[ScanRule]:
"""
Evaluate file against rules (in priority order).
Args:
file_analysis: File analysis result
rules: List of scan rules
Returns:
First matching rule or None
"""
for rule in rules:
if self._rule_matches(file_analysis, rule):
logger.debug(f"File {file_analysis.file_name} matches rule: {rule.name}")
return rule
return None
def _rule_matches(self, file_analysis: FileAnalysis, rule: ScanRule) -> bool:
"""
Check if a file matches a scan rule.
Args:
file_analysis: File analysis
rule: Scan rule
Returns:
True if all conditions match
"""
# Check if rule has any conditions defined
has_conditions = any([
rule.file_extension,
rule.audio_language_is,
rule.audio_language_not,
rule.audio_track_count_min,
rule.has_embedded_subtitle_lang,
rule.missing_embedded_subtitle_lang,
rule.missing_external_subtitle_lang
])
if not has_conditions:
logger.warning(
f"Rule '{rule.name}' has no conditions - will match ALL files. "
f"This is probably not what you want!"
)
# Check file extension filter
if rule.file_extension:
if file_analysis.file_extension not in rule.file_extension_list:
return False
# Check audio language IS
if rule.audio_language_is:
target_lang = LanguageCode.from_string(rule.audio_language_is)
# Check if file has the target language
has_target_lang = target_lang in file_analysis.audio_languages
# Also check if file has undefined language (None) - will need detection
has_undefined_lang = None in file_analysis.audio_languages or \
any(lang is None for lang in file_analysis.audio_languages)
if not has_target_lang:
# If language is undefined, try to detect it with Whisper
if has_undefined_lang:
logger.info(
f"File {file_analysis.file_name} has undefined audio language - "
f"attempting detection with Whisper..."
)
detected_lang = language_detector.detect_language(file_analysis.file_path)
if detected_lang:
logger.info(
f"Detected language for {file_analysis.file_name}: {detected_lang}"
)
# Check if detected language matches rule
if detected_lang == target_lang:
logger.info(
f"✓ Detected language '{detected_lang}' matches rule '{rule.name}'"
)
# Update file_analysis with detected language for later use
if file_analysis.audio_tracks:
file_analysis.audio_tracks[0].language = detected_lang
return True # Continue checking other conditions
else:
logger.debug(
f"Rule '{rule.name}' failed: detected '{detected_lang}' "
f"but expected '{rule.audio_language_is}'"
)
return False
else:
logger.warning(
f"Failed to detect language for {file_analysis.file_name} - skipping"
)
return False
else:
# Language is defined but doesn't match
logger.debug(
f"Rule '{rule.name}' audio check failed for {file_analysis.file_name}: "
f"Expected '{rule.audio_language_is}' but found "
f"{[str(lang) if lang else 'und' for lang in file_analysis.audio_languages]}"
)
return False
# Check audio language NOT
if rule.audio_language_not:
excluded_langs = [
LanguageCode.from_string(lang) for lang in rule.audio_language_not_list
]
if any(lang in file_analysis.audio_languages for lang in excluded_langs):
return False
# Check minimum audio tracks
if rule.audio_track_count_min:
if len(file_analysis.audio_tracks) < rule.audio_track_count_min:
return False
# Check HAS embedded subtitle
if rule.has_embedded_subtitle_lang:
required_lang = LanguageCode.from_string(rule.has_embedded_subtitle_lang)
if not file_analysis.has_embedded_subtitle_language(required_lang):
return False
# Check MISSING embedded subtitle
if rule.missing_embedded_subtitle_lang:
excluded_lang = LanguageCode.from_string(rule.missing_embedded_subtitle_lang)
if file_analysis.has_embedded_subtitle_language(excluded_lang):
return False
# Check MISSING external subtitle
if rule.missing_external_subtitle_lang:
excluded_lang = LanguageCode.from_string(rule.missing_external_subtitle_lang)
if file_analysis.has_external_subtitle_language(excluded_lang):
return False
# All conditions matched
logger.debug(
f"File '{file_analysis.file_name}' matched rule '{rule.name}' "
f"(priority: {rule.priority})"
)
return True
def _queue_language_detection_job(self, file_analysis: FileAnalysis) -> bool:
"""
Create and queue a language detection job for a file with unknown audio language.
Args:
file_analysis: File analysis
Returns:
True if job was queued successfully
"""
try:
from backend.core.models import JobType, JobStatus
# Check if there's already a completed detection job for this file
with database.get_session() as session:
from backend.core.models import Job
existing_detection = session.query(Job).filter(
Job.file_path == file_analysis.file_path,
Job.job_type == JobType.LANGUAGE_DETECTION,
Job.status == JobStatus.COMPLETED
).first()
if existing_detection:
logger.info(
f"✓ Language already detected for {file_analysis.file_name}, "
f"checking for transcription rules..."
)
# Extract detected language from SRT content
if existing_detection.srt_content:
# Format: "Language detected: ja (Japanese)\nConfidence: 99%"
lines = existing_detection.srt_content.split('\n')
if lines:
lang_line = lines[0]
if 'Language detected:' in lang_line:
lang_code = lang_line.split(':')[1].strip().split(' ')[0]
# Trigger rule checking with detected language
self._check_and_queue_transcription_for_file(
file_analysis.file_path, lang_code
)
return False
# Add language detection job with high priority
job = queue_manager.add_job(
file_path=file_analysis.file_path,
file_name=file_analysis.file_name,
source_lang=None, # To be detected
target_lang=None,
quality_preset=QualityPreset.FAST,
priority=15, # Higher than normal transcription (0-10) but lower than manual (20+)
transcribe_or_translate="transcribe",
job_type=JobType.LANGUAGE_DETECTION,
)
if job:
logger.info(
f"✓ Queued LANGUAGE DETECTION job {job.id} for {file_analysis.file_name}"
)
self.files_queued += 1
return True
else:
logger.warning(
f"✗ Skipped detection for {file_analysis.file_name}: Job already exists"
)
return False
except Exception as e:
logger.error(f"Error queuing language detection job: {e}")
return False
def _check_and_queue_transcription_for_file(self, file_path: str, detected_lang_code: str):
"""
Check if a file with detected language matches any scan rules and queue transcription.
Args:
file_path: Path to the file
detected_lang_code: Detected language code (ISO 639-1, e.g., 'ja', 'en')
"""
try:
logger.info(
f"Checking if {file_path} with language '{detected_lang_code}' "
f"matches any scan rules..."
)
# Load scan rules
rules = self._load_scan_rules()
if not rules:
logger.debug("No active scan rules found")
return
# Check each rule
for rule in rules:
# Check if language matches
if rule.audio_language_is:
try:
rule_lang = LanguageCode.from_string(rule.audio_language_is)
# Convert detected language (ISO 639-1) to LanguageCode for comparison
detected_lang = LanguageCode.from_iso_639_1(detected_lang_code)
if detected_lang != rule_lang:
logger.debug(
f"Rule '{rule.name}' requires language {rule_lang}, "
f"but detected {detected_lang}"
)
continue
except Exception as e:
logger.warning(f"Could not parse rule language code: {e}")
continue
# Check if language should be excluded
if rule.audio_language_not:
excluded_langs = [
LanguageCode.from_string(lang.strip())
for lang in rule.audio_language_not.split(',')
]
detected_lang_obj = LanguageCode.from_iso_639_1(detected_lang_code)
if detected_lang_obj in excluded_langs:
logger.debug(
f"Rule '{rule.name}' excludes language {detected_lang_code}"
)
continue
# File matches this rule - queue transcription job
logger.info(
f"File {file_path} matches rule '{rule.name}' - queueing transcription job"
)
# Get target language (use ISO 639-1 throughout)
target_lang_code = rule.target_language or "eng"
# Map quality preset
quality_map = {
"fast": QualityPreset.FAST,
"balanced": QualityPreset.BALANCED,
"best": QualityPreset.BEST,
}
quality = quality_map.get(rule.quality_preset, QualityPreset.FAST)
# Create transcription job
# All language codes in ISO 639-1 format (ja, en, es)
job = queue_manager.add_job(
file_path=file_path,
file_name=os.path.basename(file_path),
source_lang=detected_lang_code, # ISO 639-1 (ja, en, es)
target_lang=target_lang_code, # ISO 639-1 (es, en, fr, etc)
quality_preset=quality,
transcribe_or_translate=rule.action_type or "translate",
priority=rule.job_priority or 5,
is_manual_request=False,
)
if job:
logger.info(
f"✓ Queued transcription job {job.id} for {os.path.basename(file_path)}: "
f"{rule.action_type} {detected_lang_code}{target_lang_code}"
)
self.files_queued += 1
# Only queue once (first matching rule)
return
logger.debug(f"File {file_path} does not match any scan rules")
except Exception as e:
logger.error(
f"Error checking scan rules for {file_path}: {e}",
exc_info=True
)
def _queue_job_from_rule(
self, file_analysis: FileAnalysis, rule: ScanRule
) -> bool:
"""
Create and queue a job based on matched rule.
Args:
file_analysis: File analysis
rule: Matched scan rule
Returns:
True if job was queued successfully
"""
try:
# Map quality preset
quality_map = {
"fast": QualityPreset.FAST,
"balanced": QualityPreset.BALANCED,
"best": QualityPreset.BEST,
}
quality_preset = quality_map.get(rule.quality_preset, QualityPreset.FAST)
# Determine source language (default audio track)
source_lang = file_analysis.default_audio_language
source_lang_code = source_lang.to_iso_639_1() if source_lang else None
# Add job to queue
job = queue_manager.add_job(
file_path=file_analysis.file_path,
file_name=file_analysis.file_name,
source_lang=source_lang_code,
target_lang=rule.target_language,
quality_preset=quality_preset,
priority=rule.job_priority,
transcribe_or_translate=rule.action_type,
)
if job:
logger.info(
f"✓ Queued job {job.id} for {file_analysis.file_name}: "
f"{rule.action_type} {source_lang_code}{rule.target_language}"
)
self.files_queued += 1
return True
else:
logger.warning(
f"✗ Skipped {file_analysis.file_name}: Job already exists or in queue "
f"(path: {file_analysis.file_path}, target: {rule.target_language})"
)
return False
except Exception as e:
logger.error(f"Error queuing job: {e}")
return False
def _load_scan_rules(self) -> List[ScanRule]:
"""
Load enabled scan rules from database.
Returns:
List of enabled rules (sorted by priority)
"""
with database.get_session() as session:
rules = (
session.query(ScanRule)
.filter(ScanRule.enabled == True)
.order_by(ScanRule.priority.desc(), ScanRule.id)
.all()
)
# Expunge rules from session so they can be used outside the context
for rule in rules:
session.expunge(rule)
return rules
def _persist_scan_stats(self, files_in_this_scan: int = 0):
"""
Persist scan statistics to database for persistence across restarts.
Args:
files_in_this_scan: Number of files scanned in the current scan operation
"""
from backend.core.settings_service import settings_service
try:
# Save last scan time
if self.last_scan_time:
settings_service.set(
'scanner_last_scan_time',
self.last_scan_time.isoformat(),
category='scanner'
)
# Increment scan count
scan_count = settings_service.get('scanner_scan_count', 0)
try:
scan_count = int(scan_count)
except (ValueError, TypeError):
scan_count = 0
scan_count += 1
settings_service.set(
'scanner_scan_count',
str(scan_count),
category='scanner'
)
# Save total files scanned (cumulative)
if files_in_this_scan > 0:
current_total = settings_service.get('scanner_total_files_scanned', 0)
try:
current_total = int(current_total)
except (ValueError, TypeError):
current_total = 0
new_total = current_total + files_in_this_scan
settings_service.set(
'scanner_total_files_scanned',
str(new_total),
category='scanner'
)
logger.debug(f"Persisted scan stats: scan_count={scan_count}, last_scan={self.last_scan_time}, total_files={new_total}")
else:
logger.debug(f"Persisted scan stats: scan_count={scan_count}, last_scan={self.last_scan_time}")
except Exception as e:
logger.error(f"Failed to persist scan stats: {e}")
# === Scheduler Methods ===
def start_scheduler(self, interval_minutes: Optional[int] = None):
"""
Start scheduled periodic scanning.
Args:
interval_minutes: Scan interval (uses config if None)
"""
if self.scheduler and self.scheduler.running:
logger.warning("Scheduler already running")
return
from backend.core.settings_service import settings_service
interval = interval_minutes or int(settings_service.get('scanner_schedule_interval_minutes', 360))
self.scheduler = BackgroundScheduler()
self.scheduler.add_job(
func=self.scan_libraries,
trigger="interval",
minutes=interval,
id="library_scan",
name=f"Library scan (every {interval}m)",
)
self.scheduler.start()
logger.info(f"Scheduler started: scanning every {interval} minutes")
def stop_scheduler(self):
"""Stop scheduled scanning."""
if self.scheduler and self.scheduler.running:
try:
# wait=False to avoid blocking on running jobs
self.scheduler.shutdown(wait=False)
except Exception as e:
logger.warning(f"Error shutting down scheduler: {e}")
self.scheduler = None
logger.info("Scheduler stopped")
# === File Watcher Methods ===
def start_file_watcher(self, paths: Optional[List[str]] = None, recursive: bool = True):
"""
Start real-time file watching.
Args:
paths: Paths to watch (uses config if None)
recursive: Whether to watch subdirectories
"""
if self.file_observer:
logger.warning("File watcher already running")
return
# Get paths from settings_service if not provided
if paths is None:
from backend.core.settings_service import settings_service
library_paths = settings_service.get('library_paths', '')
if not library_paths:
logger.error("No library paths configured")
return
# Handle both comma and pipe separators
if '|' in library_paths:
paths = [p.strip() for p in library_paths.split("|") if p.strip()]
else:
paths = [p.strip() for p in library_paths.split(",") if p.strip()]
self.file_observer = Observer()
handler = LibraryFileHandler(self)
for path in paths:
if os.path.isdir(path):
self.file_observer.schedule(handler, path, recursive=recursive)
logger.info(f"Watching: {path} (recursive={recursive})")
self.file_observer.start()
logger.info("File watcher started")
def stop_file_watcher(self):
"""Stop real-time file watching."""
if self.file_observer:
try:
self.file_observer.stop()
# Use timeout to avoid blocking indefinitely
self.file_observer.join(timeout=5.0)
except Exception as e:
logger.warning(f"Error stopping file watcher: {e}")
self.file_observer = None
logger.info("File watcher stopped")
def get_status(self) -> Dict:
"""
Get scanner status.
Returns:
Dictionary with scanner status
"""
from backend.core.settings_service import settings_service
watched_paths = []
if self.file_observer:
# Get watched paths from observer
watched_paths = [str(w.path) for w in self.file_observer.emitters]
next_scan_time = None
if self.scheduler and self.scheduler.running:
# Get next scheduled job time
jobs = self.scheduler.get_jobs()
if jobs:
next_scan_time = jobs[0].next_run_time.isoformat()
# Get last_scan_time from database (persisted) or memory (current session)
last_scan_time = self.last_scan_time
if last_scan_time is None:
# Try to load from database
db_last_scan = settings_service.get('scanner_last_scan_time')
if db_last_scan:
try:
last_scan_time = datetime.fromisoformat(db_last_scan)
except ValueError:
last_scan_time = None
# Get scan count from database
scan_count = settings_service.get('scanner_scan_count', 0)
try:
scan_count = int(scan_count)
except (ValueError, TypeError):
scan_count = 0
# Get total_files_scanned from database
total_files_scanned = settings_service.get('scanner_total_files_scanned', 0)
try:
total_files_scanned = int(total_files_scanned)
except (ValueError, TypeError):
total_files_scanned = 0
return {
"scheduler_enabled": self.scheduler is not None,
"scheduler_running": self.scheduler is not None and self.scheduler.running,
"next_scan_time": next_scan_time,
"watcher_enabled": self.file_observer is not None,
"watcher_running": self.file_observer is not None,
"watched_paths": watched_paths,
"last_scan_time": last_scan_time.isoformat() if last_scan_time else None,
"total_scans": scan_count,
"total_files_scanned": total_files_scanned,
}
def scan_paths(self, paths: List[str], recursive: bool = True) -> Dict:
"""
Scan specific paths.
Args:
paths: List of paths to scan
recursive: Whether to scan subdirectories
Returns:
Scan result dictionary
"""
if self.is_scanning:
logger.warning("Scan already in progress")
return {
"scanned_files": 0,
"matched_files": 0,
"jobs_created": 0,
"skipped_files": 0,
"paths_scanned": [],
"error": "Scan already in progress"
}
self.is_scanning = True
scanned = 0
matched = 0
jobs_created = 0
skipped = 0
try:
for path in paths:
if not os.path.exists(path):
logger.warning(f"Path does not exist: {path}")
continue
# Scan directory
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
if not FileAnalyzer.is_video_file(file_path):
continue
scanned += 1
# Process file
if self.process_file(file_path):
matched += 1
jobs_created += 1
else:
skipped += 1
if not recursive:
break
# Single file
elif os.path.isfile(path):
if FileAnalyzer.is_video_file(path):
scanned += 1
if self.process_file(path):
matched += 1
jobs_created += 1
else:
skipped += 1
self.last_scan_time = datetime.now(timezone.utc)
self.files_scanned += scanned
self._persist_scan_stats(files_in_this_scan=scanned)
return {
"scanned_files": scanned,
"matched_files": matched,
"jobs_created": jobs_created,
"skipped_files": skipped,
"paths_scanned": paths,
}
finally:
self.is_scanning = False
# Global scanner instance
library_scanner = LibraryScanner()

118
backend/scanning/models.py Normal file
View File

@@ -0,0 +1,118 @@
"""Database models for library scanning rules."""
from datetime import datetime
from typing import Optional, List
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Index
from sqlalchemy.sql import func
from backend.core.database import Base
class ScanRule(Base):
"""
Scan rule for filtering media files in standalone mode.
Rules define conditions that files must match and actions to take when matched.
Example: "All Japanese audio without Spanish subtitles should be transcribed to Spanish"
"""
__tablename__ = "scan_rules"
# Primary identification
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(256), nullable=False, unique=True)
enabled = Column(Boolean, nullable=False, default=True, index=True)
priority = Column(Integer, nullable=False, default=0, index=True)
# === CONDITION FILTERS (all must match for rule to apply) ===
# Audio language filters
audio_language_is = Column(String(10), nullable=True)
"""Audio track language must be this (ISO 639-1). Example: 'ja'"""
audio_language_not = Column(String(64), nullable=True)
"""Audio track language must NOT be any of these (comma-separated). Example: 'en,es'"""
audio_track_count_min = Column(Integer, nullable=True)
"""Minimum number of audio tracks required"""
# Subtitle filters
has_embedded_subtitle_lang = Column(String(10), nullable=True)
"""Must have embedded subtitle in this language. Example: 'en'"""
missing_embedded_subtitle_lang = Column(String(10), nullable=True)
"""Must NOT have embedded subtitle in this language. Example: 'es'"""
missing_external_subtitle_lang = Column(String(10), nullable=True)
"""Must NOT have external .srt file in this language. Example: 'es'"""
# File format filters
file_extension = Column(String(64), nullable=True)
"""File extension filter (comma-separated). Example: '.mkv,.mp4'"""
# === ACTION (what to do when rule matches) ===
action_type = Column(String(20), nullable=False, default="transcribe")
"""Action: 'transcribe' or 'translate'"""
target_language = Column(String(10), nullable=False)
"""Target subtitle language (ISO 639-1). Example: 'es'"""
quality_preset = Column(String(20), nullable=False, default="fast")
"""Quality preset: 'fast', 'balanced', or 'best'"""
job_priority = Column(Integer, nullable=False, default=0)
"""Priority for jobs created by this rule (higher = processed first)"""
# Metadata
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
def __repr__(self):
"""String representation."""
return f"<ScanRule {self.id}: {self.name} [{'enabled' if self.enabled else 'disabled'}]>"
def to_dict(self) -> dict:
"""Convert rule to dictionary for API responses."""
return {
"id": self.id,
"name": self.name,
"enabled": self.enabled,
"priority": self.priority,
"conditions": {
"audio_language_is": self.audio_language_is,
"audio_language_not": self.audio_language_not,
"audio_track_count_min": self.audio_track_count_min,
"has_embedded_subtitle_lang": self.has_embedded_subtitle_lang,
"missing_embedded_subtitle_lang": self.missing_embedded_subtitle_lang,
"missing_external_subtitle_lang": self.missing_external_subtitle_lang,
"file_extension": self.file_extension,
},
"action": {
"action_type": self.action_type,
"target_language": self.target_language,
"quality_preset": self.quality_preset,
"job_priority": self.job_priority,
},
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
@property
def audio_language_not_list(self) -> List[str]:
"""Get audio_language_not as a list."""
if not self.audio_language_not:
return []
return [lang.strip() for lang in self.audio_language_not.split(",") if lang.strip()]
@property
def file_extension_list(self) -> List[str]:
"""Get file_extension as a list."""
if not self.file_extension:
return []
return [ext.strip() for ext in self.file_extension.split(",") if ext.strip()]
# Create indexes for common queries
Index('idx_scan_rules_enabled_priority', ScanRule.enabled, ScanRule.priority.desc())
Index('idx_scan_rules_name', ScanRule.name)

568
backend/setup_wizard.py Normal file
View File

@@ -0,0 +1,568 @@
"""Setup wizard for first-time configuration."""
import os
import sys
import socket
from pathlib import Path
from typing import Optional, List, Dict
class SetupWizard:
"""Interactive setup wizard for first run."""
def __init__(self):
"""Initialize setup wizard."""
self.config_file = Path(".env")
def is_first_run(self) -> bool:
"""
Check if this is the first run.
Returns:
True if first run (setup_completed setting is not true)
"""
try:
from backend.core.settings_service import settings_service
setup_completed = settings_service.get("setup_completed", None)
return setup_completed != "true"
except Exception:
# Database not initialized yet, assume first run
return True
def run(self) -> bool:
"""
Run the setup wizard.
Returns:
True if setup completed successfully
"""
print("\n" + "=" * 70)
print(" 🎬 TranscriptorIO - First Run Setup Wizard")
print("=" * 70 + "\n")
# Step 1: Select mode
mode = self._select_mode()
if not mode:
return False
# Step 2: Configure based on mode
if mode == "standalone":
config = self._configure_standalone_mode()
else: # bazarr
config = self._configure_bazarr_mode()
if not config:
return False
# Step 3: Save configuration to database
return self._save_to_database(config)
def _select_mode(self) -> Optional[str]:
"""
Prompt user to select operation mode.
Returns:
'standalone' or 'bazarr', or None if cancelled
"""
print("Select operation mode:\n")
print(" 1. Standalone Mode")
print(" - Automatic library scanning")
print(" - Rule-based subtitle generation")
print(" - Scheduled/real-time file watching")
print()
print(" 2. Bazarr Slave Mode")
print(" - Receives tasks from Bazarr")
print(" - Custom provider integration")
print(" - On-demand transcription only")
print()
while True:
choice = input("Enter mode (1 or 2): ").strip()
if choice == "1":
return "standalone"
elif choice == "2":
return "bazarr"
elif choice.lower() in ["q", "quit", "exit"]:
print("\nSetup cancelled.")
return None
else:
print("Invalid choice. Please enter 1 or 2 (or 'q' to quit).\n")
def _configure_standalone_mode(self) -> Optional[dict]:
"""
Configure standalone mode settings.
Returns:
Configuration dict or None if cancelled
"""
print("\n" + "-" * 70)
print(" 📁 Standalone Mode Configuration")
print("-" * 70 + "\n")
config = {
"transcriptarr_mode": "standalone",
"scanner_enabled": True,
"scanner_schedule_enabled": True,
"scanner_file_watcher_enabled": False,
"bazarr_provider_enabled": False,
}
# Step 1: Library paths
print("Step 1: Library Paths")
print("-" * 40)
library_paths = self._configure_library_paths()
if not library_paths:
return None
config["library_paths"] = library_paths
# Step 2: Scanner settings
print("\nStep 2: Scanner Configuration")
print("-" * 40)
scanner_config = self._configure_scanner()
config.update(scanner_config)
# Step 3: Worker configuration
print("\nStep 3: Worker Configuration")
print("-" * 40)
worker_config = self._configure_workers()
config.update(worker_config)
# Step 4: Scan rules (at least one)
print("\nStep 4: Scan Rules")
print("-" * 40)
print("You need at least one scan rule to determine which files to process.\n")
rules = []
while True:
rule = self._create_scan_rule(len(rules) + 1)
if rule:
rules.append(rule)
print(f"\n✅ Rule {len(rules)} created successfully!\n")
if len(rules) >= 1:
add_more = input("Add another rule? (y/n) [n]: ").strip().lower()
if add_more != "y":
break
else:
if len(rules) == 0:
print("\n⚠️ You need at least one rule. Let's try again.\n")
else:
break
config["scan_rules"] = rules
return config
def _configure_library_paths(self) -> Optional[List[str]]:
"""
Configure library paths to scan.
Returns:
List of paths or None if cancelled
"""
print("Enter the folders where your media files are stored.")
print("You can add multiple paths (one per line). Enter empty line when done.\n")
print("Examples:")
print(" /media/anime")
print(" /mnt/movies")
print(" /data/series\n")
paths = []
while True:
if len(paths) == 0:
prompt = "Enter first path: "
else:
prompt = f"Enter path {len(paths) + 1} (or press Enter to finish): "
path = input(prompt).strip()
# Empty input
if not path:
if len(paths) == 0:
print("❌ You need at least one path.\n")
continue
else:
break
# Validate path
if not os.path.isabs(path):
print("❌ Path must be absolute (start with /).\n")
continue
if not os.path.isdir(path):
print(f"⚠️ Warning: Path '{path}' does not exist.")
confirm = input("Add it anyway? (y/n): ").strip().lower()
if confirm != "y":
continue
paths.append(path)
print(f"✅ Added: {path}\n")
print(f"\n📁 Total paths configured: {len(paths)}")
for i, p in enumerate(paths, 1):
print(f" {i}. {p}")
return paths
def _configure_scanner(self) -> dict:
"""
Configure scanner settings.
Returns:
Scanner configuration dict
"""
config = {}
# Scheduled scanning
print("\n🕒 Scheduled Scanning")
print("Scan your library periodically (e.g., every 60 minutes).\n")
enable_schedule = input("Enable scheduled scanning? (y/n) [y]: ").strip().lower()
config["scanner_schedule_enabled"] = enable_schedule != "n"
if config["scanner_schedule_enabled"]:
while True:
interval = input("Scan interval in minutes [60]: ").strip()
if not interval:
interval = "60"
try:
interval_int = int(interval)
if interval_int < 1:
print("❌ Interval must be at least 1 minute.\n")
continue
config["scanner_schedule_interval_minutes"] = interval_int
break
except ValueError:
print("❌ Please enter a valid number.\n")
# File watcher
print("\n👁️ Real-time File Watching")
print("Detect new files immediately as they are added (more CPU intensive).\n")
enable_watcher = input("Enable real-time file watching? (y/n) [n]: ").strip().lower()
config["scanner_file_watcher_enabled"] = enable_watcher == "y"
return config
def _configure_workers(self) -> dict:
"""
Configure worker auto-start settings.
Returns:
Worker configuration dict
"""
config = {}
print("\n⚙️ Worker Auto-Start Configuration")
print("Workers process transcription jobs. Configure how many should start automatically.\n")
# Check if Whisper is available
try:
from backend.transcription.transcriber import WHISPER_AVAILABLE
if not WHISPER_AVAILABLE:
print("⚠️ WARNING: Whisper is not installed!")
print(" Workers will not start until you install stable-ts or faster-whisper.")
print(" You can configure workers now and install Whisper later.\n")
except ImportError:
print("⚠️ WARNING: Could not check Whisper availability.\n")
# CPU workers
print("🖥️ CPU Workers")
print("CPU workers use your processor. Recommended: 1-2 workers.\n")
while True:
cpu_input = input("Number of CPU workers to start on boot [1]: ").strip()
if not cpu_input:
cpu_input = "1"
try:
cpu_count = int(cpu_input)
if cpu_count < 0:
print("❌ Must be 0 or greater.\n")
continue
config["worker_cpu_count"] = cpu_count
break
except ValueError:
print("❌ Please enter a valid number.\n")
# GPU workers
print("\n🎮 GPU Workers")
print("GPU workers use your graphics card (much faster if available).")
print("Only configure if you have CUDA-compatible GPU.\n")
while True:
gpu_input = input("Number of GPU workers to start on boot [0]: ").strip()
if not gpu_input:
gpu_input = "0"
try:
gpu_count = int(gpu_input)
if gpu_count < 0:
print("❌ Must be 0 or greater.\n")
continue
config["worker_gpu_count"] = gpu_count
break
except ValueError:
print("❌ Please enter a valid number.\n")
if config["worker_cpu_count"] == 0 and config["worker_gpu_count"] == 0:
print("\n⚠️ No workers configured. You can add them later in Settings.")
else:
total = config["worker_cpu_count"] + config["worker_gpu_count"]
print(f"\n✅ Configured {total} worker(s) to start automatically:")
if config["worker_cpu_count"] > 0:
print(f"{config['worker_cpu_count']} CPU worker(s)")
if config["worker_gpu_count"] > 0:
print(f"{config['worker_gpu_count']} GPU worker(s)")
return config
def _create_scan_rule(self, rule_number: int) -> Optional[dict]:
"""
Create a single scan rule interactively.
Args:
rule_number: Rule number for display
Returns:
Rule dict or None if cancelled
"""
print(f"\nCreating Rule #{rule_number}")
print("=" * 40)
# Rule name
name = input(f"Rule name (e.g., 'Japanese anime to Spanish'): ").strip()
if not name:
name = f"Rule {rule_number}"
# Source audio language
print("\nSource audio language (ISO 639-2 code):")
print(" jpn = Japanese")
print(" eng = English")
print(" ron = Romanian")
print(" spa = Spanish")
print(" (or leave empty for any language)")
audio_lang = input("Audio language [any]: ").strip().lower() or None
# Task type
print("\nAction type:")
print(" 1. Transcribe (audio → English subtitles)")
print(" 2. Translate (audio → English → target language)")
print("\n📝 Note:")
print(" • Transcribe: Always creates English subtitles (.eng.srt)")
print(" • Translate: Creates English + target language subtitles (.eng.srt + .spa.srt)")
while True:
task_choice = input("Choose action (1 or 2) [1]: ").strip()
if not task_choice or task_choice == "1":
action_type = "transcribe"
target_lang = "eng" # Transcribe always targets English
print("✓ Target language set to: eng (English)")
break
elif task_choice == "2":
action_type = "translate"
print("\nTarget subtitle language (ISO 639-2 code):")
print("Examples: spa (Spanish), fra (French), deu (German), ita (Italian)")
target_lang = input("Target language: ").strip().lower()
if not target_lang:
print("❌ Target language is required for translate mode.")
continue
if target_lang == "eng":
print("⚠️ Note: Target is English. Consider using 'transcribe' instead.")
print(f"✓ Will create: .eng.srt + .{target_lang}.srt")
break
else:
print("❌ Invalid choice. Please enter 1 or 2.\n")
# Check for missing subtitles
print("\nOnly process files that are missing subtitles?")
check_missing = input("Check for missing subtitle (y/n) [y]: ").strip().lower()
missing_subtitle_lang = target_lang if check_missing != "n" else None
# Priority
print("\nRule priority (higher = evaluated first):")
while True:
priority_input = input("Priority [10]: ").strip()
if not priority_input:
priority = 10
break
try:
priority = int(priority_input)
break
except ValueError:
print("❌ Please enter a valid number.\n")
rule = {
"name": name,
"enabled": True,
"priority": priority,
"audio_language_is": audio_lang,
"missing_external_subtitle_lang": missing_subtitle_lang,
"action_type": action_type,
"target_language": target_lang,
"quality_preset": "fast",
"job_priority": 0,
}
# Show summary
print("\n📋 Rule Summary:")
print(f" Name: {name}")
print(f" Audio: {audio_lang or 'any'}")
print(f" Action: {action_type}")
if action_type == "transcribe":
print(f" Output: .eng.srt (English subtitles)")
else:
print(f" Output: .eng.srt + .{target_lang}.srt")
print(f" Check missing: {'yes' if missing_subtitle_lang else 'no'}")
print(f" Priority: {priority}")
return rule
def _configure_bazarr_mode(self) -> Optional[dict]:
"""
Configure Bazarr slave mode settings.
Returns:
Configuration dict or None if cancelled
"""
print("\n" + "-" * 70)
print(" 🔌 Bazarr Slave Mode Configuration")
print("-" * 70 + "\n")
config = {
"transcriptarr_mode": "bazarr",
"scanner_enabled": False,
"scanner_schedule_enabled": False,
"scanner_file_watcher_enabled": False,
"bazarr_provider_enabled": True,
}
# Get network info
hostname = socket.gethostname()
# Try to get local IP
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
local_ip = s.getsockname()[0]
s.close()
except Exception:
local_ip = "127.0.0.1"
print("Bazarr will send transcription requests to this service.\n")
print("📡 Connection Information:")
print("=" * 70)
print(f"\n Hostname: {hostname}")
print(f" Local IP: {local_ip}")
print(f" Port: 8000 (default)\n")
print("Configure Bazarr custom provider with these URLs:")
print("-" * 70)
print(f"\n Localhost (same machine):")
print(f" http://localhost:8000/asr")
print(f" http://127.0.0.1:8000/asr\n")
print(f" Local Network (other machines):")
print(f" http://{local_ip}:8000/asr\n")
print("=" * 70)
print("\nPress Enter to continue...")
input()
return config
def _save_to_database(self, config: dict) -> bool:
"""
Save configuration to database instead of .env.
Args:
config: Configuration dictionary
Returns:
True if saved successfully
"""
print("\n" + "-" * 70)
print(" 💾 Saving Configuration")
print("-" * 70 + "\n")
try:
# Import here to avoid circular imports
from backend.core.database import database
from backend.core.settings_service import settings_service
# Initialize database if needed
print("Initializing database...")
database.init_db()
# Initialize default settings
print("Initializing settings...")
settings_service.init_default_settings()
# Extract scan rules if present
scan_rules = config.pop("scan_rules", [])
# Update settings in database
settings_dict = {}
for key, value in config.items():
# Convert library_paths list to JSON string if needed
if key == "library_paths" and isinstance(value, list):
import json
value = json.dumps(value)
# Convert integers to strings (settings are stored as strings)
elif isinstance(value, int):
value = str(value)
# Convert booleans to strings
elif isinstance(value, bool):
value = str(value).lower()
settings_dict[key] = value
print(f"Saving {len(settings_dict)} settings...")
settings_service.update_multiple(settings_dict)
# Create scan rules if in standalone mode
if scan_rules:
from backend.core.database import get_session
from backend.scanning.models import ScanRule
print(f"Creating {len(scan_rules)} scan rules...")
with get_session() as session:
for rule_data in scan_rules:
rule = ScanRule(**rule_data)
session.add(rule)
session.commit()
print("\n✅ Configuration saved successfully!")
print("\n" + "=" * 70)
print(" 🚀 Setup Complete!")
print("=" * 70)
print("\nYou can now start the server with:")
print(" python backend/cli.py server\n")
print("Or with auto-reload for development:")
print(" python backend/cli.py server --reload\n")
if config.get("transcriptarr_mode") == "standalone":
print("Access the Web UI at:")
print(" http://localhost:8000\n")
return True
except Exception as e:
print(f"\n❌ Error saving configuration: {e}")
import traceback
traceback.print_exc()
return False
def run_setup_wizard() -> bool:
"""
Run setup wizard if needed.
Returns:
True if setup completed or not needed
"""
wizard = SetupWizard()
if not wizard.is_first_run():
return True
print("\n⚠️ First run detected - configuration needed\n")
return wizard.run()
if __name__ == "__main__":
success = run_setup_wizard()
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,5 @@
"""Whisper transcription module."""
from backend.transcription.transcriber import WhisperTranscriber
from backend.transcription.translator import SRTTranslator, translate_srt_file
__all__ = ["WhisperTranscriber", "SRTTranslator", "translate_srt_file"]

View File

@@ -0,0 +1,354 @@
"""Audio processing utilities extracted from transcriptarr.py."""
import logging
import os
from io import BytesIO
from typing import List, Dict, Optional
import ffmpeg
# Optional import - graceful degradation if not available
try:
import av
AV_AVAILABLE = True
except ImportError:
av = None
AV_AVAILABLE = False
logging.warning("av (PyAV) not available. Some audio features may not work.")
from backend.core.language_code import LanguageCode
logger = logging.getLogger(__name__)
def extract_audio_segment(
input_file: str,
start_time: int,
duration: int,
) -> BytesIO:
"""
Extract a segment of audio from a file to memory.
Args:
input_file: Path to input media file
start_time: Start time in seconds
duration: Duration in seconds
Returns:
BytesIO object containing audio segment
"""
try:
logger.debug(f"Extracting audio: {input_file}, start={start_time}s, duration={duration}s")
out, _ = (
ffmpeg.input(input_file, ss=start_time, t=duration)
.output("pipe:1", format="wav", acodec="pcm_s16le", ar=16000)
.run(capture_stdout=True, capture_stderr=True)
)
if not out:
raise ValueError("FFmpeg output is empty")
return BytesIO(out)
except ffmpeg.Error as e:
logger.error(f"FFmpeg error: {e.stderr.decode()}")
raise
except Exception as e:
logger.error(f"Error extracting audio: {e}")
raise
def get_audio_tracks(video_file: str) -> List[Dict]:
"""
Get information about audio tracks in a media file.
Args:
video_file: Path to media file
Returns:
List of dicts with audio track information
"""
try:
probe = ffmpeg.probe(video_file, select_streams="a")
audio_streams = probe.get("streams", [])
audio_tracks = []
for stream in audio_streams:
# Get all possible language tags - check multiple locations
tags = stream.get("tags", {})
# Try different common tag names (MKV uses different conventions)
lang_tag = (
tags.get("language") or # Standard location
tags.get("LANGUAGE") or # Uppercase variant
tags.get("lang") or # Short form
stream.get("language") or # Sometimes at stream level
"und" # Default: undefined
)
# Log ALL tags for debugging
logger.debug(
f"Audio track {stream.get('index')}: "
f"codec={stream.get('codec_name')}, "
f"lang_tag='{lang_tag}', "
f"all_tags={tags}"
)
language = LanguageCode.from_iso_639_2(lang_tag)
# Log when language is undefined
if lang_tag == "und" or language is None:
logger.warning(
f"Audio track {stream.get('index')} in {video_file}: "
f"Language undefined (tag='{lang_tag}'). "
f"Available tags: {list(tags.keys())}"
)
audio_track = {
"index": int(stream.get("index", 0)),
"codec": stream.get("codec_name", "unknown"),
"channels": int(stream.get("channels", 0)),
"language": language,
"title": tags.get("title", ""),
"default": stream.get("disposition", {}).get("default", 0) == 1,
"forced": stream.get("disposition", {}).get("forced", 0) == 1,
"original": stream.get("disposition", {}).get("original", 0) == 1,
"commentary": "commentary" in tags.get("title", "").lower(),
}
audio_tracks.append(audio_track)
return audio_tracks
except ffmpeg.Error as e:
logger.error(f"FFmpeg error: {e.stderr}")
return []
except Exception as e:
logger.error(f"Error reading audio tracks: {e}")
return []
def extract_audio_track_to_memory(
input_video_path: str, track_index: int
) -> Optional[BytesIO]:
"""
Extract a specific audio track to memory.
Args:
input_video_path: Path to video file
track_index: Audio track index
Returns:
BytesIO with audio data or None
"""
if track_index is None:
logger.warning(f"Skipping audio track extraction for {input_video_path}")
return None
try:
out, _ = (
ffmpeg.input(input_video_path)
.output(
"pipe:",
map=f"0:{track_index}",
format="wav",
ac=1,
ar=16000,
loglevel="quiet",
)
.run(capture_stdout=True, capture_stderr=True)
)
return BytesIO(out)
except ffmpeg.Error as e:
logger.error(f"FFmpeg error extracting track: {e.stderr.decode()}")
return None
def get_audio_languages(video_path: str) -> List[LanguageCode]:
"""
Extract language codes from audio streams.
Args:
video_path: Path to video file
Returns:
List of LanguageCode objects
"""
audio_tracks = get_audio_tracks(video_path)
return [track["language"] for track in audio_tracks]
def get_subtitle_languages(video_path: str) -> List[LanguageCode]:
"""
Extract language codes from subtitle streams.
Args:
video_path: Path to video file
Returns:
List of LanguageCode objects
"""
languages = []
try:
with av.open(video_path) as container:
for stream in container.streams.subtitles:
lang_code = stream.metadata.get("language")
if lang_code:
languages.append(LanguageCode.from_iso_639_2(lang_code))
else:
languages.append(LanguageCode.NONE)
except Exception as e:
logger.error(f"Error reading subtitle languages: {e}")
return languages
def has_audio(file_path: str) -> bool:
"""
Check if a file has valid audio streams.
Args:
file_path: Path to media file
Returns:
True if file has audio, False otherwise
"""
if not AV_AVAILABLE or av is None:
logger.warning(f"av (PyAV) not available, cannot check audio for {file_path}")
# Assume file has audio if we can't check
return True
try:
if not os.path.isfile(file_path):
return False
with av.open(file_path) as container:
for stream in container.streams:
if stream.type == "audio":
if stream.codec_context and stream.codec_context.name != "none":
return True
return False
except Exception as e:
# Catch all exceptions since av.FFmpegError might not exist if av is None
logger.debug(f"Error checking audio in {file_path}: {e}")
return False
def has_subtitle_language_in_file(
video_file: str, target_language: LanguageCode
) -> bool:
"""
Check if video has embedded subtitles in target language.
Args:
video_file: Path to video file
target_language: Language to check for
Returns:
True if subtitles exist in target language
"""
if not AV_AVAILABLE or av is None:
logger.warning(f"av (PyAV) not available, cannot check subtitles for {video_file}")
return False
try:
with av.open(video_file) as container:
subtitle_streams = [
stream
for stream in container.streams
if stream.type == "subtitle" and "language" in stream.metadata
]
for stream in subtitle_streams:
stream_language = LanguageCode.from_string(
stream.metadata.get("language", "").lower()
)
if stream_language == target_language:
logger.debug(f"Found subtitles in '{target_language}' in video")
return True
return False
except Exception as e:
logger.error(f"Error checking subtitles: {e}")
return False
def has_subtitle_of_language_in_folder(
video_file: str, target_language: LanguageCode
) -> bool:
"""
Check if external subtitle file exists for video.
Args:
video_file: Path to video file
target_language: Language to check for
Returns:
True if external subtitle exists
"""
subtitle_extensions = {".srt", ".vtt", ".sub", ".ass", ".ssa"}
video_folder = os.path.dirname(video_file)
video_name = os.path.splitext(os.path.basename(video_file))[0]
try:
for file_name in os.listdir(video_folder):
if not any(file_name.endswith(ext) for ext in subtitle_extensions):
continue
subtitle_name, _ = os.path.splitext(file_name)
if not subtitle_name.startswith(video_name):
continue
# Extract language from filename
parts = subtitle_name[len(video_name) :].lstrip(".").split(".")
for part in parts:
if LanguageCode.from_string(part) == target_language:
logger.debug(f"Found external subtitle: {file_name}")
return True
return False
except Exception as e:
logger.error(f"Error checking external subtitles: {e}")
return False
def handle_multiple_audio_tracks(
file_path: str, language: Optional[LanguageCode] = None
) -> Optional[BytesIO]:
"""
Handle files with multiple audio tracks.
Args:
file_path: Path to media file
language: Preferred language
Returns:
BytesIO with extracted audio or None
"""
audio_tracks = get_audio_tracks(file_path)
if len(audio_tracks) <= 1:
return None
logger.debug(f"Handling {len(audio_tracks)} audio tracks")
# Find track by language
audio_track = None
if language:
for track in audio_tracks:
if track["language"] == language:
audio_track = track
break
# Fallback to first track
if not audio_track:
audio_track = audio_tracks[0]
return extract_audio_track_to_memory(file_path, audio_track["index"])

View File

@@ -0,0 +1,408 @@
"""Whisper transcription wrapper for worker processes."""
import logging
import os
import gc
import ctypes
import ctypes.util
from typing import Optional, Callable
from io import BytesIO
import numpy as np
# Optional imports - graceful degradation if not available
try:
import stable_whisper
import torch
WHISPER_AVAILABLE = True
except ImportError:
stable_whisper = None
torch = None
WHISPER_AVAILABLE = False
logging.warning("stable_whisper or torch not available. Transcription will not work.")
logger = logging.getLogger(__name__)
class TranscriptionResult:
"""Result of a transcription operation."""
def __init__(self, result, language: str, segments_count: int):
"""
Initialize transcription result.
Args:
result: stable-ts result object
language: Detected or forced language
segments_count: Number of subtitle segments
"""
self.result = result
self.language = language
self.segments_count = segments_count
def to_srt(self, output_path: str, word_level: bool = False) -> str:
"""
Save result as SRT file.
Args:
output_path: Path to save SRT file
word_level: Enable word-level timestamps
Returns:
Path to saved file
"""
self.result.to_srt_vtt(output_path, word_level=word_level)
return output_path
def get_srt_content(self, word_level: bool = False) -> str:
"""
Get SRT content as string.
Args:
word_level: Enable word-level timestamps
Returns:
SRT content
"""
return "".join(self.result.to_srt_vtt(filepath=None, word_level=word_level))
class WhisperTranscriber:
"""
Whisper transcription engine wrapper.
Manages Whisper model loading/unloading and transcription operations.
Designed to run in worker processes with isolated model instances.
"""
def __init__(
self,
model_name: Optional[str] = None,
device: Optional[str] = None,
model_path: Optional[str] = None,
compute_type: Optional[str] = None,
threads: Optional[int] = None,
):
"""
Initialize transcriber.
Args:
model_name: Whisper model name (tiny, base, small, medium, large, etc.)
device: Device to use (cpu, cuda, gpu)
model_path: Path to store/load models
compute_type: Compute type (auto, int8, float16, etc.)
threads: Number of CPU threads
"""
# Import settings_service here to avoid circular imports
from backend.core.settings_service import settings_service
# Load from database settings with sensible defaults
self.model_name = model_name or settings_service.get('whisper_model', 'medium')
self.device = (device or settings_service.get('transcribe_device', 'cpu')).lower()
if self.device == "gpu":
self.device = "cuda"
self.model_path = model_path or settings_service.get('model_path', './models')
# Get compute_type from settings based on device type
if compute_type:
requested_compute_type = compute_type
elif self.device == "cpu":
requested_compute_type = settings_service.get('cpu_compute_type', 'auto')
else:
requested_compute_type = settings_service.get('gpu_compute_type', 'auto')
# Auto-detect compatible compute_type based on device
self.compute_type = self._get_compatible_compute_type(self.device, requested_compute_type)
self.threads = threads or int(settings_service.get('whisper_threads', 4))
self.model = None
self.is_loaded = False
if self.compute_type != requested_compute_type:
logger.warning(
f"Requested compute_type '{requested_compute_type}' is not compatible with device '{self.device}'. "
f"Using '{self.compute_type}' instead."
)
logger.info(
f"WhisperTranscriber initialized: model={self.model_name}, "
f"device={self.device}, compute_type={self.compute_type}"
)
def _get_compatible_compute_type(self, device: str, requested: str) -> str:
"""
Get compatible compute type for the device.
CPU: Only supports int8 and float32
GPU: Supports float16, float32, int8, int8_float16
Args:
device: Device type (cpu, cuda)
requested: Requested compute type
Returns:
Compatible compute type
"""
if device == "cpu":
# CPU only supports int8 and float32
if requested == "auto":
return "int8" # int8 is faster on CPU
elif requested in ("float16", "int8_float16"):
logger.warning(f"CPU doesn't support {requested}, falling back to int8")
return "int8"
elif requested in ("int8", "float32"):
return requested
else:
logger.warning(f"Unknown compute type {requested}, using int8")
return "int8"
else:
# CUDA/GPU supports all types
if requested == "auto":
return "float16" # float16 is recommended for GPU
elif requested in ("float16", "float32", "int8", "int8_float16"):
return requested
else:
logger.warning(f"Unknown compute type {requested}, using float16")
return "float16"
def load_model(self):
"""Load Whisper model into memory."""
if not WHISPER_AVAILABLE:
raise RuntimeError(
"Whisper is not available. Install with: pip install stable-ts faster-whisper"
)
if self.is_loaded and self.model is not None:
logger.debug("Model already loaded")
return
try:
logger.info(f"Loading Whisper model: {self.model_name}")
self.model = stable_whisper.load_faster_whisper(
self.model_name,
download_root=self.model_path,
device=self.device,
cpu_threads=self.threads,
num_workers=1, # Each worker has own model
compute_type=self.compute_type if self.device == "gpu" or self.device == "cuda" else "float32",
)
self.is_loaded = True
logger.info(f"Model {self.model_name} loaded successfully")
except Exception as e:
logger.error(f"Failed to load model {self.model_name}: {e}")
raise
def unload_model(self):
"""Unload model from memory and clear cache."""
if not self.is_loaded or self.model is None:
logger.debug("Model not loaded, nothing to unload")
return
try:
logger.info("Unloading Whisper model")
# Unload the model
if hasattr(self.model, "model") and hasattr(self.model.model, "unload_model"):
self.model.model.unload_model()
del self.model
self.model = None
self.is_loaded = False
# Clear CUDA cache if using GPU
if self.device == "cuda" and torch.cuda.is_available():
torch.cuda.empty_cache()
logger.debug("CUDA cache cleared")
# Garbage collection
if os.name != "nt": # Don't run on Windows
gc.collect()
try:
ctypes.CDLL(ctypes.util.find_library("c")).malloc_trim(0)
except Exception:
pass
logger.info("Model unloaded successfully")
except Exception as e:
logger.error(f"Error unloading model: {e}")
def transcribe_file(
self,
file_path: str,
language: Optional[str] = None,
task: str = "transcribe",
progress_callback: Optional[Callable] = None,
) -> TranscriptionResult:
"""
Transcribe a media file.
Args:
file_path: Path to media file
language: Language code (ISO 639-1) or None for auto-detect
task: 'transcribe' or 'translate'
progress_callback: Optional callback for progress updates
Returns:
TranscriptionResult object
Raises:
Exception: If transcription fails
"""
# Ensure model is loaded
if not self.is_loaded:
self.load_model()
try:
logger.info(f"Transcribing file: {file_path} (language={language}, task={task})")
# Prepare transcription arguments
args = {}
if progress_callback:
args["progress_callback"] = progress_callback
# Add custom regroup if configured
from backend.core.settings_service import settings_service
custom_regroup = settings_service.get('custom_regroup', 'cm_sl=84_sl=42++++++1')
if custom_regroup:
args["regroup"] = custom_regroup
# Perform transcription
result = self.model.transcribe(
file_path,
language=language,
task=task,
**args,
)
segments_count = len(result.segments) if hasattr(result, "segments") else 0
detected_language = result.language if hasattr(result, "language") else language or "unknown"
logger.info(
f"Transcription completed: {segments_count} segments, "
f"language={detected_language}"
)
return TranscriptionResult(
result=result,
language=detected_language,
segments_count=segments_count,
)
except Exception as e:
logger.error(f"Transcription failed for {file_path}: {e}")
raise
def transcribe_audio_data(
self,
audio_data: bytes,
language: Optional[str] = None,
task: str = "transcribe",
sample_rate: int = 16000,
progress_callback: Optional[Callable] = None,
) -> TranscriptionResult:
"""
Transcribe raw audio data (for Bazarr provider mode).
Args:
audio_data: Raw audio bytes
language: Language code or None
task: 'transcribe' or 'translate'
sample_rate: Audio sample rate
progress_callback: Optional progress callback
Returns:
TranscriptionResult object
"""
if not self.is_loaded:
self.load_model()
try:
logger.info(f"Transcribing audio data (size={len(audio_data)} bytes)")
args = {
"audio": audio_data,
"input_sr": sample_rate,
}
if progress_callback:
args["progress_callback"] = progress_callback
from backend.core.settings_service import settings_service
custom_regroup = settings_service.get('custom_regroup', 'cm_sl=84_sl=42++++++1')
if custom_regroup:
args["regroup"] = custom_regroup
result = self.model.transcribe(task=task, language=language, **args)
segments_count = len(result.segments) if hasattr(result, "segments") else 0
detected_language = result.language if hasattr(result, "language") else language or "unknown"
logger.info(f"Audio transcription completed: {segments_count} segments")
return TranscriptionResult(
result=result,
language=detected_language,
segments_count=segments_count,
)
except Exception as e:
logger.error(f"Audio transcription failed: {e}")
raise
def detect_language(
self,
file_path: str,
offset: int = 0,
length: int = 30,
) -> str:
"""
Detect language of a media file.
Args:
file_path: Path to media file
offset: Start offset in seconds
length: Duration to analyze in seconds
Returns:
Language code (ISO 639-1)
"""
if not self.is_loaded:
self.load_model()
try:
logger.info(f"Detecting language for: {file_path} (offset={offset}s, length={length}s)")
# Extract audio segment for analysis
from backend.transcription.audio_utils import extract_audio_segment
audio_segment = extract_audio_segment(file_path, offset, length)
result = self.model.transcribe(audio_segment.read())
detected_language = result.language if hasattr(result, "language") else "unknown"
logger.info(f"Detected language: {detected_language}")
return detected_language
except Exception as e:
logger.error(f"Language detection failed for {file_path}: {e}")
return "unknown"
def __enter__(self):
"""Context manager entry."""
self.load_model()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
from backend.core.settings_service import settings_service
if settings_service.get('clear_vram_on_complete', True) in (True, 'true', 'True', '1', 1):
self.unload_model()
def __del__(self):
"""Destructor - ensure model is unloaded."""
try:
if self.is_loaded:
self.unload_model()
except Exception:
pass

View File

@@ -0,0 +1,198 @@
"""SRT translation service using Google Translate or DeepL."""
import logging
from typing import Optional
import re
logger = logging.getLogger(__name__)
# Check for translation library availability
try:
from deep_translator import GoogleTranslator
TRANSLATOR_AVAILABLE = True
except ImportError:
GoogleTranslator = None
TRANSLATOR_AVAILABLE = False
class SRTTranslator:
"""
Translate SRT subtitle files from English to target language.
Uses deep-translator library with Google Translate as backend.
Falls back gracefully if library not installed.
"""
def __init__(self, target_language: str):
"""
Initialize translator.
Args:
target_language: ISO 639-1 code (e.g., 'es', 'fr', 'ja')
"""
if not TRANSLATOR_AVAILABLE:
raise RuntimeError(
"Translation library not available. Install with: pip install deep-translator"
)
# Google Translate accepts ISO 639-1 codes directly
self.target_language = target_language
logger.info(f"Initializing translator for language: {target_language}")
self.translator = None
def _get_translator(self):
"""Lazy load translator."""
if self.translator is None:
self.translator = GoogleTranslator(source='en', target=self.target_language)
return self.translator
def translate_srt_content(self, srt_content: str) -> str:
"""
Translate SRT content from English to target language.
Args:
srt_content: SRT formatted string in English
Returns:
SRT formatted string in target language
Raises:
Exception: If translation fails
"""
if not srt_content or not srt_content.strip():
logger.warning("Empty SRT content, nothing to translate")
return srt_content
try:
logger.info(f"Translating SRT content to {self.target_language}")
# Parse SRT into blocks
blocks = self._parse_srt(srt_content)
if not blocks:
logger.warning("No subtitle blocks found in SRT")
return srt_content
# Translate each text block
translator = self._get_translator()
translated_blocks = []
for block in blocks:
try:
# Only translate the text, keep index and timestamps
translated_text = translator.translate(block['text'])
translated_blocks.append({
'index': block['index'],
'timestamp': block['timestamp'],
'text': translated_text
})
except Exception as e:
logger.error(f"Failed to translate block {block['index']}: {e}")
# Keep original text on error
translated_blocks.append(block)
# Reconstruct SRT
result = self._reconstruct_srt(translated_blocks)
logger.info(f"Successfully translated {len(translated_blocks)} subtitle blocks")
return result
except Exception as e:
logger.error(f"Translation failed: {e}")
raise
def _parse_srt(self, srt_content: str) -> list:
"""
Parse SRT content into structured blocks.
Args:
srt_content: Raw SRT string
Returns:
List of dicts with 'index', 'timestamp', 'text'
"""
blocks = []
# Split by double newline (subtitle blocks separator)
raw_blocks = re.split(r'\n\s*\n', srt_content.strip())
for raw_block in raw_blocks:
lines = raw_block.strip().split('\n')
if len(lines) < 3:
continue # Invalid block
try:
index = lines[0].strip()
timestamp = lines[1].strip()
text = '\n'.join(lines[2:]) # Join remaining lines as text
blocks.append({
'index': index,
'timestamp': timestamp,
'text': text
})
except Exception as e:
logger.warning(f"Failed to parse SRT block: {e}")
continue
return blocks
def _reconstruct_srt(self, blocks: list) -> str:
"""
Reconstruct SRT content from structured blocks.
Args:
blocks: List of dicts with 'index', 'timestamp', 'text'
Returns:
SRT formatted string
"""
srt_lines = []
for block in blocks:
srt_lines.append(block['index'])
srt_lines.append(block['timestamp'])
srt_lines.append(block['text'])
srt_lines.append('') # Empty line separator
return '\n'.join(srt_lines)
def translate_srt_file(
input_path: str,
output_path: str,
target_language: str
) -> bool:
"""
Translate an SRT file from English to target language.
Args:
input_path: Path to input SRT file (English)
output_path: Path to output SRT file (target language)
target_language: ISO 639-1 code
Returns:
True if successful, False otherwise
"""
try:
# Read input SRT
with open(input_path, 'r', encoding='utf-8') as f:
srt_content = f.read()
# Translate
translator = SRTTranslator(target_language=target_language)
translated_content = translator.translate_srt_content(srt_content)
# Write output SRT
with open(output_path, 'w', encoding='utf-8') as f:
f.write(translated_content)
logger.info(f"Translated SRT saved to {output_path}")
return True
except Exception as e:
logger.error(f"Failed to translate SRT file: {e}")
return False

View File

@@ -1,40 +0,0 @@
#docker-compose.yml
version: '2'
services:
subgen:
container_name: subgen
tty: true
image: mccloud/subgen
environment:
- "WHISPER_MODEL=medium"
- "WHISPER_THREADS=4"
- "PROCADDEDMEDIA=True"
- "PROCMEDIAONPLAY=False"
- "NAMESUBLANG=aa"
- "SKIPIFINTERNALSUBLANG=eng"
- "PLEXTOKEN=plextoken"
- "PLEXSERVER=http://plexserver:32400"
- "JELLYFINTOKEN=token here"
- "JELLYFINSERVER=http://jellyfin:8096"
- "WEBHOOKPORT=9000"
- "CONCURRENT_TRANSCRIPTIONS=2"
- "WORD_LEVEL_HIGHLIGHT=False"
- "DEBUG=True"
- "USE_PATH_MAPPING=False"
- "PATH_MAPPING_FROM=/tv"
- "PATH_MAPPING_TO=/Volumes/TV"
- "TRANSCRIBE_DEVICE=cpu"
- "CLEAR_VRAM_ON_COMPLETE=True"
- "MODEL_PATH=./models"
- "UPDATE=False"
- "APPEND=False"
- "USE_MODEL_PROMPT=False"
- "CUSTOM_MODEL_PROMPT="
- "LRC_FOR_AUDIO_FILES=True"
- "CUSTOM_REGROUP=cm_sl=84_sl=42++++++1"
volumes:
- "${TV}:/tv"
- "${MOVIES}:/movies"
- "${APPDATA}/subgen/models:/subgen/models"
ports:
- "9000:9000"

1195
docs/API.md Normal file

File diff suppressed because it is too large Load Diff

613
docs/ARCHITECTURE.md Normal file
View File

@@ -0,0 +1,613 @@
# TranscriptorIO Backend Architecture
Technical documentation of the backend architecture, components, and data flow.
## Table of Contents
- [Overview](#overview)
- [Directory Structure](#directory-structure)
- [Core Components](#core-components)
- [Data Flow](#data-flow)
- [Database Schema](#database-schema)
- [Transcription vs Translation](#transcription-vs-translation)
- [Worker Architecture](#worker-architecture)
- [Queue System](#queue-system)
- [Scanner System](#scanner-system)
- [Settings System](#settings-system)
- [Graceful Degradation](#graceful-degradation)
- [Thread Safety](#thread-safety)
- [Important Patterns](#important-patterns)
---
## Overview
TranscriptorIO is built with a modular architecture consisting of:
- **FastAPI Server**: REST API with 45+ endpoints
- **Worker Pool**: Multiprocessing-based transcription workers (CPU/GPU)
- **Queue Manager**: Persistent job queue with priority support
- **Library Scanner**: Rule-based file scanning with scheduler and watcher
- **Settings Service**: Database-backed configuration system
```
┌─────────────────────────────────────────────────────────┐
│ FastAPI Server │
│ ┌─────────────────────────────────────────────────┐ │
│ │ REST API (45+ endpoints) │ │
│ │ /api/workers | /api/jobs | /api/settings │ │
│ │ /api/scanner | /api/system | /api/setup │ │
│ └─────────────────────────────────────────────────┘ │
└──────────────────┬──────────────────────────────────────┘
┌──────────────┼──────────────┬──────────────────┐
│ │ │ │
▼ ▼ ▼ ▼
┌────────┐ ┌──────────┐ ┌─────────┐ ┌──────────┐
│ Worker │ │ Queue │ │ Scanner │ │ Database │
│ Pool │◄──┤ Manager │◄──┤ Engine │ │ SQLite/ │
│ CPU/GPU│ │ Priority │ │ Rules + │ │ Postgres │
└────────┘ │ Queue │ │ Watcher │ └──────────┘
└──────────┘ └─────────┘
```
---
## Directory Structure
```
backend/
├── app.py # FastAPI application + lifespan
├── cli.py # CLI commands (server, db, worker, scan, setup)
├── config.py # Pydantic Settings (from .env)
├── setup_wizard.py # Interactive first-run setup
├── core/
│ ├── database.py # SQLAlchemy setup + session management
│ ├── models.py # Job model + enums
│ ├── language_code.py # ISO 639 language code utilities
│ ├── settings_model.py # SystemSettings model (database-backed)
│ ├── settings_service.py # Settings service with caching
│ ├── system_monitor.py # CPU/RAM/GPU/VRAM monitoring
│ ├── queue_manager.py # Persistent queue with priority
│ ├── worker.py # Individual worker (Process)
│ └── worker_pool.py # Worker pool orchestrator
├── transcription/
│ ├── __init__.py # Exports + WHISPER_AVAILABLE flag
│ ├── transcriber.py # WhisperTranscriber wrapper
│ ├── translator.py # Google Translate integration
│ └── audio_utils.py # ffmpeg/ffprobe utilities
├── scanning/
│ ├── __init__.py # Exports (NO library_scanner import!)
│ ├── models.py # ScanRule model
│ ├── file_analyzer.py # ffprobe file analysis
│ ├── language_detector.py # Audio language detection
│ ├── detected_languages.py # Language mappings
│ └── library_scanner.py # Scanner + scheduler + watcher
└── api/
├── __init__.py # Router exports
├── workers.py # Worker management endpoints
├── jobs.py # Job queue endpoints
├── scan_rules.py # Scan rules CRUD
├── scanner.py # Scanner control endpoints
├── settings.py # Settings CRUD endpoints
├── system.py # System resources endpoints
├── filesystem.py # Filesystem browser endpoints
└── setup_wizard.py # Setup wizard endpoints
```
---
## Core Components
### 1. WorkerPool (`core/worker_pool.py`)
Orchestrates CPU/GPU workers as separate processes.
**Key Features:**
- Dynamic add/remove workers at runtime
- Health monitoring with auto-restart
- Thread-safe multiprocessing
- Each worker is an isolated Process
```python
from backend.core.worker_pool import worker_pool
from backend.core.worker import WorkerType
# Add GPU worker on device 0
worker_id = worker_pool.add_worker(WorkerType.GPU, device_id=0)
# Add CPU worker
worker_id = worker_pool.add_worker(WorkerType.CPU)
# Get pool stats
stats = worker_pool.get_pool_stats()
```
### 2. QueueManager (`core/queue_manager.py`)
Persistent SQLite/PostgreSQL queue with priority support.
**Key Features:**
- Job deduplication (no duplicate `file_path`)
- Row-level locking with `skip_locked=True`
- Priority-based ordering (higher first)
- FIFO within same priority (by `created_at`)
- Auto-retry failed jobs
```python
from backend.core.queue_manager import queue_manager
from backend.core.models import QualityPreset
job = queue_manager.add_job(
file_path="/media/anime.mkv",
file_name="anime.mkv",
source_lang="jpn",
target_lang="spa",
quality_preset=QualityPreset.FAST,
priority=5
)
```
### 3. LibraryScanner (`scanning/library_scanner.py`)
Rule-based file scanning system.
**Three Scan Modes:**
- **Manual**: One-time scan via API or CLI
- **Scheduled**: Periodic scanning with APScheduler
- **Real-time**: File watcher with watchdog library
```python
from backend.scanning.library_scanner import library_scanner
# Manual scan
result = library_scanner.scan_paths(["/media/anime"], recursive=True)
# Start scheduler (every 6 hours)
library_scanner.start_scheduler(interval_minutes=360)
# Start file watcher
library_scanner.start_file_watcher(paths=["/media/anime"], recursive=True)
```
### 4. WhisperTranscriber (`transcription/transcriber.py`)
Wrapper for stable-whisper and faster-whisper.
**Key Features:**
- GPU/CPU support with auto-device detection
- VRAM management and cleanup
- Graceful degradation (works without Whisper installed)
```python
from backend.transcription.transcriber import WhisperTranscriber
transcriber = WhisperTranscriber(
model_name="large-v3",
device="cuda",
compute_type="float16"
)
result = transcriber.transcribe_file(
file_path="/media/episode.mkv",
language="jpn",
task="translate" # translate to English
)
result.to_srt("episode.eng.srt")
```
### 5. SettingsService (`core/settings_service.py`)
Database-backed configuration with caching.
```python
from backend.core.settings_service import settings_service
# Get setting
value = settings_service.get("worker_cpu_count", default=1)
# Set setting
settings_service.set("worker_cpu_count", "2")
# Bulk update
settings_service.bulk_update({
"worker_cpu_count": "2",
"scanner_enabled": "true"
})
```
---
## Data Flow
```
1. LibraryScanner detects file (manual/scheduled/watcher)
2. FileAnalyzer analyzes with ffprobe
- Audio tracks (codec, language, channels)
- Embedded subtitles
- External .srt files
- Duration, video info
3. Rules Engine evaluates against ScanRules (priority order)
- Checks all conditions (audio language, missing subs, etc.)
- First matching rule wins
4. If match → QueueManager.add_job()
- Deduplication check (no duplicate file_path)
- Assigns priority based on rule
5. Worker pulls job from queue
- Uses with_for_update(skip_locked=True)
- FIFO within same priority
6. WhisperTranscriber processes with model
- Stage 1: Audio → English (Whisper translate)
- Stage 2: English → Target (Google Translate, if needed)
7. Generate output SRT file(s)
- .eng.srt (always)
- .{target}.srt (if translate mode)
8. Job marked completed ✓
```
---
## Database Schema
### Job Table (`jobs`)
```sql
id VARCHAR PRIMARY KEY
file_path VARCHAR UNIQUE -- Ensures no duplicates
file_name VARCHAR
status VARCHAR -- queued/processing/completed/failed/cancelled
priority INTEGER
source_lang VARCHAR
target_lang VARCHAR
quality_preset VARCHAR -- fast/balanced/best
transcribe_or_translate VARCHAR -- transcribe/translate
progress FLOAT
current_stage VARCHAR
eta_seconds INTEGER
created_at DATETIME
started_at DATETIME
completed_at DATETIME
output_path VARCHAR
srt_content TEXT
segments_count INTEGER
error TEXT
retry_count INTEGER
max_retries INTEGER
worker_id VARCHAR
vram_used_mb INTEGER
processing_time_seconds FLOAT
```
### ScanRule Table (`scan_rules`)
```sql
id INTEGER PRIMARY KEY
name VARCHAR UNIQUE
enabled BOOLEAN
priority INTEGER -- Higher = evaluated first
-- Conditions (all must match):
audio_language_is VARCHAR -- ISO 639-2
audio_language_not VARCHAR -- Comma-separated
audio_track_count_min INTEGER
has_embedded_subtitle_lang VARCHAR
missing_embedded_subtitle_lang VARCHAR
missing_external_subtitle_lang VARCHAR
file_extension VARCHAR -- Comma-separated
-- Action:
action_type VARCHAR -- transcribe/translate
target_language VARCHAR
quality_preset VARCHAR
job_priority INTEGER
created_at DATETIME
updated_at DATETIME
```
### SystemSettings Table (`system_settings`)
```sql
id INTEGER PRIMARY KEY
key VARCHAR UNIQUE
value TEXT
description TEXT
category VARCHAR -- general/workers/transcription/scanner/bazarr
value_type VARCHAR -- string/integer/boolean/list
created_at DATETIME
updated_at DATETIME
```
---
## Transcription vs Translation
### Understanding the Two Modes
**Mode 1: `transcribe`** (Audio → English subtitles)
```
Audio (any language) → Whisper (task='translate') → English SRT
Example: Japanese audio → anime.eng.srt
```
**Mode 2: `translate`** (Audio → English → Target language)
```
Audio (any language) → Whisper (task='translate') → English SRT
→ Google Translate → Target language SRT
Example: Japanese audio → anime.eng.srt + anime.spa.srt
```
### Why Two Stages?
**Whisper Limitation**: Whisper can only translate TO English, not between other languages.
**Solution**: Two-stage process:
1. **Stage 1 (Always)**: Whisper converts audio to English using `task='translate'`
2. **Stage 2 (Only for translate mode)**: Google Translate converts English to target language
### Output Files
| Mode | Target | Output Files |
|------|--------|--------------|
| transcribe | spa | `.eng.srt` only |
| translate | spa | `.eng.srt` + `.spa.srt` |
| translate | fra | `.eng.srt` + `.fra.srt` |
---
## Worker Architecture
### Worker Types
| Type | Description | Device |
|------|-------------|--------|
| CPU | Uses CPU for inference | None |
| GPU | Uses NVIDIA GPU | cuda:N |
### Worker Lifecycle
```
┌─────────────┐
│ CREATED │
└──────┬──────┘
│ start()
┌─────────────┐
┌──────────│ IDLE │◄─────────┐
│ └──────┬──────┘ │
│ │ get_job() │ job_done()
│ ▼ │
│ ┌─────────────┐ │
│ │ BUSY │──────────┘
│ └──────┬──────┘
│ │ error
│ ▼
│ ┌─────────────┐
└──────────│ ERROR │
└─────────────┘
```
### Process Isolation
Each worker runs in a separate Python process:
- Memory isolation (VRAM per GPU worker)
- Crash isolation (one worker crash doesn't affect others)
- Independent model loading
---
## Queue System
### Priority System
```python
# Priority values
BAZARR_REQUEST = base_priority + 10 # Highest (external request)
MANUAL_REQUEST = base_priority + 5 # High (user-initiated)
AUTO_SCAN = base_priority # Normal (scanner-generated)
```
### Job Deduplication
Jobs are deduplicated by `file_path`:
- If job exists with same `file_path`, new job is rejected
- Returns `None` from `add_job()`
- Prevents duplicate processing
### Concurrency Safety
```python
# Row-level locking prevents race conditions
job = session.query(Job).filter(
Job.status == JobStatus.QUEUED
).with_for_update(skip_locked=True).first()
```
---
## Scanner System
### Scan Rule Evaluation
Rules are evaluated in priority order (highest first):
```python
# Pseudo-code for rule matching
for rule in rules.order_by(priority.desc()):
if rule.enabled and matches_all_conditions(file, rule):
create_job(file, rule.action)
break # First match wins
```
### Conditions
All conditions must match (AND logic):
| Condition | Match If |
|-----------|----------|
| audio_language_is | Primary audio track language equals |
| audio_language_not | Primary audio track language NOT in list |
| audio_track_count_min | Number of audio tracks >= value |
| has_embedded_subtitle_lang | Has embedded subtitle in language |
| missing_embedded_subtitle_lang | Does NOT have embedded subtitle |
| missing_external_subtitle_lang | Does NOT have external .srt file |
| file_extension | File extension in comma-separated list |
---
## Settings System
### Categories
| Category | Settings |
|----------|----------|
| general | operation_mode, library_paths, log_level |
| workers | cpu_count, gpu_count, auto_start, healthcheck_interval |
| transcription | whisper_model, compute_type, vram_management |
| scanner | enabled, schedule_interval, watcher_enabled |
| bazarr | provider_enabled, api_key |
### Caching
Settings service implements caching:
- Cache invalidated on write
- Thread-safe access
- Lazy loading from database
---
## Graceful Degradation
The system can run WITHOUT Whisper/torch/PyAV installed:
```python
# Pattern used everywhere
try:
import stable_whisper
WHISPER_AVAILABLE = True
except ImportError:
stable_whisper = None
WHISPER_AVAILABLE = False
# Later in code
if not WHISPER_AVAILABLE:
raise RuntimeError("Install with: pip install stable-ts faster-whisper")
```
**What works without Whisper:**
- Backend server starts normally
- All APIs work fully
- Frontend development
- Scanner and rules management
- Job queue (jobs just won't be processed)
**What doesn't work:**
- Actual transcription (throws RuntimeError)
---
## Thread Safety
### Database Sessions
Always use context managers:
```python
with database.get_session() as session:
# Session is automatically committed on success
# Rolled back on exception
job = session.query(Job).filter(...).first()
```
### Worker Pool
- Each worker is a separate Process (multiprocessing)
- Communication via shared memory (Manager)
- No GIL contention between workers
### Queue Manager
- Uses SQLAlchemy row locking
- `skip_locked=True` prevents deadlocks
- Transactions are short-lived
---
## Important Patterns
### Circular Import Resolution
**Critical**: `backend/scanning/__init__.py` MUST NOT import `library_scanner`:
```python
# backend/scanning/__init__.py
from backend.scanning.models import ScanRule
from backend.scanning.file_analyzer import FileAnalyzer, FileAnalysis
# DO NOT import library_scanner here!
```
**Why?**
```
library_scanner → database → models → scanning.models → database (circular!)
```
**Solution**: Import `library_scanner` locally where needed:
```python
def some_function():
from backend.scanning.library_scanner import library_scanner
library_scanner.scan_paths(...)
```
### Optional Imports
```python
try:
import pynvml
NVML_AVAILABLE = True
except ImportError:
pynvml = None
NVML_AVAILABLE = False
```
### Database Session Pattern
```python
from backend.core.database import database
with database.get_session() as session:
# All operations within session context
job = session.query(Job).filter(...).first()
job.status = JobStatus.PROCESSING
# Commit happens automatically
```
### API Response Pattern
```python
from pydantic import BaseModel
class JobResponse(BaseModel):
id: str
status: str
# ...
@router.get("/{job_id}", response_model=JobResponse)
async def get_job(job_id: str):
with database.get_session() as session:
job = session.query(Job).filter(Job.id == job_id).first()
if not job:
raise HTTPException(status_code=404, detail="Not found")
return JobResponse(**job.to_dict())
```

402
docs/CONFIGURATION.md Normal file
View File

@@ -0,0 +1,402 @@
# TranscriptorIO Configuration
Complete documentation for the configuration system.
## Table of Contents
- [Overview](#overview)
- [Configuration Methods](#configuration-methods)
- [Settings Categories](#settings-categories)
- [All Settings Reference](#all-settings-reference)
- [Environment Variables](#environment-variables)
- [Setup Wizard](#setup-wizard)
- [API Configuration](#api-configuration)
---
## Overview
TranscriptorIO uses a **database-backed configuration system**. All settings are stored in the `system_settings` table and can be managed through:
1. **Setup Wizard** (first run)
2. **Web UI** (Settings page)
3. **REST API** (`/api/settings`)
4. **CLI** (for advanced users)
This approach provides:
- Persistent configuration across restarts
- Runtime configuration changes without restart
- Category-based organization
- Type validation and parsing
---
## Configuration Methods
### 1. Setup Wizard (Recommended for First Run)
```bash
# Runs automatically on first server start
python backend/cli.py server
# Or run manually anytime
python backend/cli.py setup
```
The wizard guides you through:
- **Operation mode selection** (Standalone or Bazarr provider)
- **Library paths configuration**
- **Initial scan rules**
- **Worker configuration** (CPU/GPU counts)
- **Scanner schedule**
### 2. Web UI (Recommended for Daily Use)
Navigate to **Settings** in the web interface (`http://localhost:8000/settings`).
Features:
- Settings grouped by category tabs
- Descriptions for each setting
- Change detection (warns about unsaved changes)
- Bulk save functionality
### 3. REST API (For Automation/Integration)
```bash
# Get all settings
curl http://localhost:8000/api/settings
# Get settings by category
curl http://localhost:8000/api/settings?category=workers
# Update a setting
curl -X PUT http://localhost:8000/api/settings/worker_cpu_count \
-H "Content-Type: application/json" \
-d '{"value": "2"}'
# Bulk update
curl -X POST http://localhost:8000/api/settings/bulk-update \
-H "Content-Type: application/json" \
-d '{
"settings": {
"worker_cpu_count": "2",
"worker_gpu_count": "1"
}
}'
```
---
## Settings Categories
| Category | Description |
|----------|-------------|
| `general` | Operation mode, library paths, API server |
| `workers` | CPU/GPU worker configuration |
| `transcription` | Whisper model and transcription options |
| `subtitles` | Subtitle naming and formatting |
| `skip` | Skip conditions for files |
| `scanner` | Library scanner configuration |
| `bazarr` | Bazarr provider integration |
| `advanced` | Advanced options (path mapping, etc.) |
---
## All Settings Reference
### General Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `operation_mode` | string | `standalone` | Operation mode: `standalone`, `provider`, or `standalone,provider` |
| `library_paths` | list | `""` | Comma-separated library paths to scan |
| `api_host` | string | `0.0.0.0` | API server host |
| `api_port` | integer | `8000` | API server port |
| `debug` | boolean | `false` | Enable debug mode |
### Worker Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `worker_cpu_count` | integer | `0` | Number of CPU workers to start on boot |
| `worker_gpu_count` | integer | `0` | Number of GPU workers to start on boot |
| `concurrent_transcriptions` | integer | `2` | Maximum concurrent transcriptions |
| `worker_healthcheck_interval` | integer | `60` | Worker health check interval (seconds) |
| `worker_auto_restart` | boolean | `true` | Auto-restart failed workers |
| `clear_vram_on_complete` | boolean | `true` | Clear VRAM after job completion |
### Transcription Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `whisper_model` | string | `medium` | Whisper model: `tiny`, `base`, `small`, `medium`, `large-v3`, `large-v3-turbo` |
| `model_path` | string | `./models` | Path to store Whisper models |
| `transcribe_device` | string | `cpu` | Device: `cpu`, `cuda`, `gpu` |
| `cpu_compute_type` | string | `auto` | CPU compute type: `auto`, `int8`, `float32` |
| `gpu_compute_type` | string | `auto` | GPU compute type: `auto`, `float16`, `float32`, `int8_float16`, `int8` |
| `whisper_threads` | integer | `4` | Number of CPU threads for Whisper |
| `transcribe_or_translate` | string | `transcribe` | Default mode: `transcribe` or `translate` |
| `word_level_highlight` | boolean | `false` | Enable word-level highlighting |
| `detect_language_length` | integer | `30` | Seconds of audio for language detection |
| `detect_language_offset` | integer | `0` | Offset for language detection sample |
### Whisper Models
| Model | Size | Speed | Quality | VRAM |
|-------|------|-------|---------|------|
| `tiny` | 39M | Fastest | Basic | ~1GB |
| `base` | 74M | Very Fast | Fair | ~1GB |
| `small` | 244M | Fast | Good | ~2GB |
| `medium` | 769M | Medium | Great | ~5GB |
| `large-v3` | 1.5G | Slow | Excellent | ~10GB |
| `large-v3-turbo` | 809M | Fast | Excellent | ~6GB |
### Subtitle Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `subtitle_language_name` | string | `""` | Custom subtitle language name |
| `subtitle_language_naming_type` | string | `ISO_639_2_B` | Naming type: `ISO_639_1`, `ISO_639_2_T`, `ISO_639_2_B`, `NAME`, `NATIVE` |
| `custom_regroup` | string | `cm_sl=84_sl=42++++++1` | Custom regrouping algorithm |
**Language Naming Types:**
| Type | Example (Spanish) |
|------|-------------------|
| ISO_639_1 | `es` |
| ISO_639_2_T | `spa` |
| ISO_639_2_B | `spa` |
| NAME | `Spanish` |
| NATIVE | `Espanol` |
### Skip Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `skip_if_external_subtitles_exist` | boolean | `false` | Skip if any external subtitle exists |
| `skip_if_target_subtitles_exist` | boolean | `true` | Skip if target language subtitle exists |
| `skip_if_internal_subtitles_language` | string | `""` | Skip if internal subtitle in this language |
| `skip_subtitle_languages` | list | `""` | Pipe-separated language codes to skip |
| `skip_if_audio_languages` | list | `""` | Skip if audio track is in these languages |
| `skip_unknown_language` | boolean | `false` | Skip files with unknown audio language |
| `skip_only_subgen_subtitles` | boolean | `false` | Only skip SubGen-generated subtitles |
### Scanner Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `scanner_enabled` | boolean | `true` | Enable library scanner |
| `scanner_cron` | string | `0 2 * * *` | Cron expression for scheduled scans |
| `scanner_schedule_interval_minutes` | integer | `360` | Scan interval in minutes (6 hours) |
| `watcher_enabled` | boolean | `false` | Enable real-time file watcher |
| `auto_scan_enabled` | boolean | `false` | Enable automatic scheduled scanning |
### Bazarr Provider Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `bazarr_provider_enabled` | boolean | `false` | Enable Bazarr provider mode |
| `bazarr_url` | string | `http://bazarr:6767` | Bazarr server URL |
| `bazarr_api_key` | string | `""` | Bazarr API key (auto-generated) |
| `provider_timeout_seconds` | integer | `600` | Provider request timeout |
| `provider_callback_enabled` | boolean | `true` | Enable callback on completion |
| `provider_polling_interval` | integer | `30` | Polling interval for jobs |
### Advanced Settings
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `force_detected_language_to` | string | `""` | Force detected language to specific code |
| `preferred_audio_languages` | list | `eng` | Pipe-separated preferred audio languages |
| `use_path_mapping` | boolean | `false` | Enable path mapping for network shares |
| `path_mapping_from` | string | `/tv` | Path mapping source |
| `path_mapping_to` | string | `/Volumes/TV` | Path mapping destination |
| `lrc_for_audio_files` | boolean | `true` | Generate LRC files for audio-only files |
---
## Environment Variables
The **only** environment variable required is `DATABASE_URL` in the `.env` file:
```bash
# SQLite (default, good for single-user)
DATABASE_URL=sqlite:///./transcriptarr.db
# PostgreSQL (recommended for production)
DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr
# MariaDB/MySQL
DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr
```
**All other configuration** is stored in the database and managed through:
- Setup Wizard (first run)
- Web UI Settings page
- Settings API endpoints
This design ensures:
- No `.env` file bloat
- Runtime configuration changes without restart
- Centralized configuration management
- Easy backup (configuration is in the database)
---
## Setup Wizard
### Standalone Mode
For independent operation with local library scanning.
**Configuration Flow:**
1. Select library paths (e.g., `/media/anime`, `/media/movies`)
2. Create initial scan rules (e.g., "Japanese audio → Spanish subtitles")
3. Configure workers (CPU count, GPU count)
4. Set scanner interval (default: 6 hours)
**API Endpoint:** `POST /api/setup/standalone`
```json
{
"library_paths": ["/media/anime", "/media/movies"],
"scan_rules": [
{
"name": "Japanese to Spanish",
"audio_language_is": "jpn",
"missing_external_subtitle_lang": "spa",
"target_language": "spa",
"action_type": "transcribe"
}
],
"worker_config": {
"count": 1,
"type": "cpu"
},
"scanner_config": {
"interval_minutes": 360
}
}
```
### Bazarr Slave Mode
For integration with Bazarr as a subtitle provider.
**Configuration Flow:**
1. Select Bazarr mode
2. System auto-generates API key
3. Displays connection info for Bazarr configuration
**API Endpoint:** `POST /api/setup/bazarr-slave`
**Response:**
```json
{
"success": true,
"message": "Bazarr slave mode configured successfully",
"bazarr_info": {
"mode": "bazarr_slave",
"host": "127.0.0.1",
"port": 8000,
"api_key": "generated_api_key_here",
"provider_url": "http://127.0.0.1:8000"
}
}
```
---
## API Configuration
### Get All Settings
```bash
curl http://localhost:8000/api/settings
```
### Get by Category
```bash
curl "http://localhost:8000/api/settings?category=workers"
```
### Get Single Setting
```bash
curl http://localhost:8000/api/settings/worker_cpu_count
```
### Update Setting
```bash
curl -X PUT http://localhost:8000/api/settings/worker_cpu_count \
-H "Content-Type: application/json" \
-d '{"value": "2"}'
```
### Bulk Update
```bash
curl -X POST http://localhost:8000/api/settings/bulk-update \
-H "Content-Type: application/json" \
-d '{
"settings": {
"worker_cpu_count": "2",
"worker_gpu_count": "1",
"scanner_enabled": "true"
}
}'
```
### Create Custom Setting
```bash
curl -X POST http://localhost:8000/api/settings \
-H "Content-Type: application/json" \
-d '{
"key": "my_custom_setting",
"value": "custom_value",
"description": "My custom setting",
"category": "advanced",
"value_type": "string"
}'
```
### Delete Setting
```bash
curl -X DELETE http://localhost:8000/api/settings/my_custom_setting
```
### Initialize Defaults
```bash
curl -X POST http://localhost:8000/api/settings/init-defaults
```
---
## Python Usage
```python
from backend.core.settings_service import settings_service
# Get setting with default
cpu_count = settings_service.get("worker_cpu_count", default=1)
# Set setting
settings_service.set("worker_cpu_count", 2)
# Bulk update
settings_service.bulk_update({
"worker_cpu_count": "2",
"scanner_enabled": "true"
})
# Get all settings in category
worker_settings = settings_service.get_by_category("workers")
# Initialize defaults (safe to call multiple times)
settings_service.init_default_settings()
```

666
docs/FRONTEND.md Normal file
View File

@@ -0,0 +1,666 @@
# TranscriptorIO Frontend
Technical documentation for the Vue 3 frontend application.
## Table of Contents
- [Overview](#overview)
- [Technology Stack](#technology-stack)
- [Directory Structure](#directory-structure)
- [Development Setup](#development-setup)
- [Views](#views)
- [Components](#components)
- [State Management](#state-management)
- [API Service](#api-service)
- [Routing](#routing)
- [Styling](#styling)
- [Build and Deployment](#build-and-deployment)
---
## Overview
The TranscriptorIO frontend is a Single Page Application (SPA) built with Vue 3, featuring:
- **6 Complete Views**: Dashboard, Queue, Scanner, Rules, Workers, Settings
- **Real-time Updates**: Polling-based status updates
- **Dark Theme**: Tdarr-inspired dark UI
- **Type Safety**: Full TypeScript support
- **State Management**: Pinia stores for shared state
---
## Technology Stack
| Technology | Version | Purpose |
|------------|---------|---------|
| Vue.js | 3.4+ | UI Framework |
| Vue Router | 4.2+ | Client-side routing |
| Pinia | 2.1+ | State management |
| Axios | 1.6+ | HTTP client |
| TypeScript | 5.3+ | Type safety |
| Vite | 5.0+ | Build tool / dev server |
---
## Directory Structure
```
frontend/
├── public/ # Static assets (favicon, etc.)
├── src/
│ ├── main.ts # Application entry point
│ ├── App.vue # Root component + navigation
│ │
│ ├── views/ # Page components (routed)
│ │ ├── DashboardView.vue # System overview + resources
│ │ ├── QueueView.vue # Job management
│ │ ├── ScannerView.vue # Scanner control
│ │ ├── RulesView.vue # Scan rules CRUD
│ │ ├── WorkersView.vue # Worker pool management
│ │ └── SettingsView.vue # Settings management
│ │
│ ├── components/ # Reusable components
│ │ ├── ConnectionWarning.vue # Backend connection status
│ │ ├── PathBrowser.vue # Filesystem browser modal
│ │ └── SetupWizard.vue # First-run setup wizard
│ │
│ ├── stores/ # Pinia state stores
│ │ ├── config.ts # Configuration store
│ │ ├── system.ts # System status store
│ │ ├── workers.ts # Workers store
│ │ └── jobs.ts # Jobs store
│ │
│ ├── services/
│ │ └── api.ts # Axios API client
│ │
│ ├── router/
│ │ └── index.ts # Vue Router configuration
│ │
│ ├── types/
│ │ └── api.ts # TypeScript interfaces
│ │
│ └── assets/
│ └── css/
│ └── main.css # Global styles (dark theme)
├── index.html # HTML template
├── vite.config.ts # Vite configuration
├── tsconfig.json # TypeScript configuration
└── package.json # Dependencies
```
---
## Development Setup
### Prerequisites
- Node.js 18+ and npm
- Backend server running on port 8000
### Installation
```bash
cd frontend
# Install dependencies
npm install
# Start development server (with proxy to backend)
npm run dev
```
### Development URLs
| URL | Description |
|-----|-------------|
| http://localhost:3000 | Frontend dev server |
| http://localhost:8000 | Backend API |
| http://localhost:8000/docs | Swagger API docs |
### Scripts
```bash
npm run dev # Start dev server with HMR
npm run build # Build for production
npm run preview # Preview production build
npm run lint # Run ESLint
```
---
## Views
### DashboardView
**Path**: `/`
System overview with real-time resource monitoring.
**Features**:
- System status (running/stopped)
- CPU usage gauge
- RAM usage gauge
- GPU usage gauges (per device)
- Recent jobs list
- Worker pool summary
- Scanner status
**Data Sources**:
- `GET /api/status`
- `GET /api/system/resources`
- `GET /api/jobs?page_size=10`
### QueueView
**Path**: `/queue`
Job queue management with filtering and pagination.
**Features**:
- Job list with status icons
- Status filter (All/Queued/Processing/Completed/Failed)
- Pagination controls
- Retry failed jobs
- Cancel queued/processing jobs
- Clear completed jobs
- Job progress display
- Processing time display
**Data Sources**:
- `GET /api/jobs`
- `GET /api/jobs/stats`
- `POST /api/jobs/{id}/retry`
- `DELETE /api/jobs/{id}`
- `POST /api/jobs/queue/clear`
### ScannerView
**Path**: `/scanner`
Library scanner control and configuration.
**Features**:
- Scanner status display
- Start/stop scheduler
- Start/stop file watcher
- Manual scan trigger
- Scan results display
- Next scan time
- Total files scanned counter
**Data Sources**:
- `GET /api/scanner/status`
- `POST /api/scanner/scan`
- `POST /api/scanner/scheduler/start`
- `POST /api/scanner/scheduler/stop`
- `POST /api/scanner/watcher/start`
- `POST /api/scanner/watcher/stop`
### RulesView
**Path**: `/rules`
Scan rules CRUD management.
**Features**:
- Rules list with priority ordering
- Create new rule (modal)
- Edit existing rule (modal)
- Delete rule (with confirmation)
- Toggle rule enabled/disabled
- Condition configuration
- Action configuration
**Data Sources**:
- `GET /api/scan-rules`
- `POST /api/scan-rules`
- `PUT /api/scan-rules/{id}`
- `DELETE /api/scan-rules/{id}`
- `POST /api/scan-rules/{id}/toggle`
### WorkersView
**Path**: `/workers`
Worker pool management.
**Features**:
- Worker list with status
- Add CPU worker
- Add GPU worker (with device selection)
- Remove worker
- Start/stop pool
- Worker statistics
- Current job display per worker
- Progress and ETA display
**Data Sources**:
- `GET /api/workers`
- `GET /api/workers/stats`
- `POST /api/workers`
- `DELETE /api/workers/{id}`
- `POST /api/workers/pool/start`
- `POST /api/workers/pool/stop`
### SettingsView
**Path**: `/settings`
Database-backed settings management.
**Features**:
- Settings grouped by category
- Category tabs (General, Workers, Transcription, Scanner, Bazarr)
- Edit settings in-place
- Save changes button
- Change detection (unsaved changes warning)
- Setting descriptions
**Data Sources**:
- `GET /api/settings`
- `PUT /api/settings/{key}`
- `POST /api/settings/bulk-update`
---
## Components
### ConnectionWarning
Displays warning banner when backend is unreachable.
**Props**: None
**State**: Uses `systemStore.isConnected`
### PathBrowser
Modal component for browsing filesystem paths.
**Props**:
- `show: boolean` - Show/hide modal
- `initialPath: string` - Starting path
**Emits**:
- `select(path: string)` - Path selected
- `close()` - Modal closed
**API Calls**:
- `GET /api/filesystem/browse?path={path}`
- `GET /api/filesystem/common-paths`
### SetupWizard
First-run setup wizard component.
**Props**: None
**Features**:
- Mode selection (Standalone/Bazarr)
- Library path configuration
- Scan rule creation
- Worker configuration
- Scanner interval setting
**API Calls**:
- `GET /api/setup/status`
- `POST /api/setup/standalone`
- `POST /api/setup/bazarr-slave`
- `POST /api/setup/skip`
---
## State Management
### Pinia Stores
#### systemStore (`stores/system.ts`)
Global system state.
```typescript
interface SystemState {
isConnected: boolean
status: SystemStatus | null
resources: SystemResources | null
loading: boolean
error: string | null
}
// Actions
fetchStatus() // Fetch /api/status
fetchResources() // Fetch /api/system/resources
startPolling() // Start auto-refresh
stopPolling() // Stop auto-refresh
```
#### workersStore (`stores/workers.ts`)
Worker pool state.
```typescript
interface WorkersState {
workers: Worker[]
stats: WorkerStats | null
loading: boolean
error: string | null
}
// Actions
fetchWorkers() // Fetch all workers
fetchStats() // Fetch pool stats
addWorker(type, deviceId?) // Add worker
removeWorker(id) // Remove worker
startPool(cpuCount, gpuCount) // Start pool
stopPool() // Stop pool
```
#### jobsStore (`stores/jobs.ts`)
Job queue state.
```typescript
interface JobsState {
jobs: Job[]
stats: QueueStats | null
total: number
page: number
pageSize: number
statusFilter: string | null
loading: boolean
error: string | null
}
// Actions
fetchJobs() // Fetch with current filters
fetchStats() // Fetch queue stats
retryJob(id) // Retry failed job
cancelJob(id) // Cancel job
clearCompleted() // Clear completed jobs
setStatusFilter(status) // Update filter
setPage(page) // Change page
```
#### configStore (`stores/config.ts`)
Settings configuration state.
```typescript
interface ConfigState {
settings: Setting[]
loading: boolean
error: string | null
pendingChanges: Record<string, string>
}
// Actions
fetchSettings(category?) // Fetch settings
updateSetting(key, value) // Queue update
saveChanges() // Save all pending
discardChanges() // Discard pending
```
---
## API Service
### Configuration (`services/api.ts`)
```typescript
import axios from 'axios'
const api = axios.create({
baseURL: '/api',
timeout: 30000,
headers: {
'Content-Type': 'application/json'
}
})
// Response interceptor for error handling
api.interceptors.response.use(
response => response,
error => {
console.error('API Error:', error)
return Promise.reject(error)
}
)
export default api
```
### Usage Example
```typescript
import api from '@/services/api'
// GET request
const response = await api.get('/jobs', {
params: { status_filter: 'queued', page: 1 }
})
// POST request
const job = await api.post('/jobs', {
file_path: '/media/video.mkv',
target_lang: 'spa'
})
// PUT request
await api.put('/settings/worker_cpu_count', {
value: '2'
})
// DELETE request
await api.delete(`/jobs/${jobId}`)
```
---
## Routing
### Route Configuration
```typescript
const routes = [
{ path: '/', name: 'Dashboard', component: DashboardView },
{ path: '/workers', name: 'Workers', component: WorkersView },
{ path: '/queue', name: 'Queue', component: QueueView },
{ path: '/scanner', name: 'Scanner', component: ScannerView },
{ path: '/rules', name: 'Rules', component: RulesView },
{ path: '/settings', name: 'Settings', component: SettingsView }
]
```
### Navigation
Navigation is handled in `App.vue` with a sidebar menu.
```vue
<nav class="sidebar">
<router-link to="/">Dashboard</router-link>
<router-link to="/workers">Workers</router-link>
<router-link to="/queue">Queue</router-link>
<router-link to="/scanner">Scanner</router-link>
<router-link to="/rules">Rules</router-link>
<router-link to="/settings">Settings</router-link>
</nav>
<main class="content">
<router-view />
</main>
```
---
## Styling
### Dark Theme
The application uses a Tdarr-inspired dark theme defined in `assets/css/main.css`.
**Color Palette**:
| Variable | Value | Usage |
|----------|-------|-------|
| --bg-primary | #1a1a2e | Main background |
| --bg-secondary | #16213e | Card background |
| --bg-tertiary | #0f3460 | Hover states |
| --text-primary | #eaeaea | Primary text |
| --text-secondary | #a0a0a0 | Secondary text |
| --accent-primary | #e94560 | Buttons, links |
| --accent-success | #4ade80 | Success states |
| --accent-warning | #fbbf24 | Warning states |
| --accent-error | #ef4444 | Error states |
### Component Styling
Components use scoped CSS with CSS variables:
```vue
<style scoped>
.card {
background: var(--bg-secondary);
border-radius: 8px;
padding: 1.5rem;
}
.btn-primary {
background: var(--accent-primary);
color: white;
border: none;
padding: 0.5rem 1rem;
border-radius: 4px;
cursor: pointer;
}
.btn-primary:hover {
opacity: 0.9;
}
</style>
```
---
## Build and Deployment
### Production Build
```bash
cd frontend
npm run build
```
This creates a `dist/` folder with:
- `index.html` - Entry HTML
- `assets/` - JS, CSS bundles (hashed filenames)
### Deployment Options
#### Option 1: Served by Backend (Recommended)
The FastAPI backend automatically serves the frontend from `frontend/dist/`:
```python
# backend/app.py
frontend_path = Path(__file__).parent.parent / "frontend" / "dist"
if frontend_path.exists():
app.mount("/assets", StaticFiles(directory=str(frontend_path / "assets")))
@app.get("/{full_path:path}")
async def serve_frontend(full_path: str = ""):
return FileResponse(str(frontend_path / "index.html"))
```
**Access**: http://localhost:8000
#### Option 2: Nginx Reverse Proxy
```nginx
server {
listen 80;
server_name transcriptorio.local;
# Frontend
location / {
root /var/www/transcriptorio/frontend/dist;
try_files $uri $uri/ /index.html;
}
# Backend API
location /api {
proxy_pass http://localhost:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
}
```
#### Option 3: Docker
```dockerfile
# Build frontend
FROM node:18-alpine AS frontend-builder
WORKDIR /app/frontend
COPY frontend/package*.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build
# Final image
FROM python:3.12-slim
COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
# ... rest of backend setup
```
---
## TypeScript Interfaces
### Key Types (`types/api.ts`)
```typescript
// Job
interface Job {
id: string
file_path: string
file_name: string
status: 'queued' | 'processing' | 'completed' | 'failed' | 'cancelled'
priority: number
progress: number
// ... more fields
}
// Worker
interface Worker {
worker_id: string
worker_type: 'cpu' | 'gpu'
device_id: number | null
status: 'idle' | 'busy' | 'stopped' | 'error'
current_job_id: string | null
jobs_completed: number
jobs_failed: number
}
// Setting
interface Setting {
id: number
key: string
value: string | null
description: string | null
category: string | null
value_type: string | null
}
// ScanRule
interface ScanRule {
id: number
name: string
enabled: boolean
priority: number
conditions: ScanRuleConditions
action: ScanRuleAction
}
```

25
frontend/.gitignore vendored Normal file
View File

@@ -0,0 +1,25 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

245
frontend/README.md Normal file
View File

@@ -0,0 +1,245 @@
# TranscriptorIO Frontend
Vue 3 + TypeScript + Vite frontend for TranscriptorIO.
## 🚀 Quick Start
### Prerequisites
- Node.js 18+ (use nvm for easy management)
- npm or yarn
### Install nvm (if not installed)
```bash
# Install nvm
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
# Reload shell
source ~/.bashrc # or ~/.zshrc
# Install Node.js 18
nvm install 18
nvm use 18
```
### Install Dependencies
```bash
cd frontend
npm install
```
### Development
```bash
# Start dev server (with hot-reload)
npm run dev
# Backend proxy is configured to http://localhost:8000
# Frontend runs on http://localhost:3000
```
### Build for Production
```bash
npm run build
# Output in dist/ directory
```
### Preview Production Build
```bash
npm run preview
```
## 📁 Project Structure
```
frontend/
├── src/
│ ├── assets/
│ │ └── css/
│ │ └── main.css # Global styles (Tdarr-inspired dark theme)
│ ├── components/ # Reusable Vue components
│ ├── views/ # Page components
│ │ ├── DashboardView.vue # Main dashboard
│ │ ├── WorkersView.vue # Worker management
│ │ ├── QueueView.vue # Job queue
│ │ ├── ScannerView.vue # Library scanner
│ │ ├── RulesView.vue # Scan rules
│ │ └── SettingsView.vue # Settings
│ ├── stores/ # Pinia state management
│ │ ├── system.ts # System status store
│ │ ├── workers.ts # Workers store
│ │ └── jobs.ts # Jobs store
│ ├── services/
│ │ └── api.ts # Axios API client
│ ├── types/
│ │ └── api.ts # TypeScript interfaces
│ ├── router/
│ │ └── index.ts # Vue Router configuration
│ ├── App.vue # Root component
│ └── main.ts # App entry point
├── index.html
├── vite.config.ts # Vite configuration
├── tsconfig.json # TypeScript configuration
└── package.json
```
## 🎨 Design
### Theme
- Dark theme inspired by Tdarr
- Color palette optimized for monitoring and data visualization
- Fully responsive design
### Features Implemented
- ✅ Dashboard with system overview
- ✅ Worker management with real-time updates
- ✅ Auto-refresh every 3-5 seconds
- ✅ Modal dialogs for actions
- ✅ Status badges and progress bars
- ⏳ Job queue view (placeholder)
- ⏳ Scanner control (placeholder)
- ⏳ Rules editor (placeholder)
- ⏳ Settings (placeholder)
## 🔌 API Integration
The frontend communicates with the backend API via Axios:
```typescript
// Example usage
import { workersApi } from '@/services/api'
// Get all workers
const workers = await workersApi.getAll()
// Add a GPU worker
await workersApi.add({
worker_type: 'gpu',
device_id: 0
})
```
### API Proxy Configuration
Vite dev server proxies API requests to the backend:
```typescript
// vite.config.ts
server: {
proxy: {
'/api': 'http://localhost:8000',
'/health': 'http://localhost:8000'
}
}
```
## 🧩 State Management
Uses Pinia for state management:
```typescript
// Example store usage
import { useWorkersStore } from '@/stores/workers'
const workersStore = useWorkersStore()
await workersStore.fetchWorkers()
```
## 🔧 Development
### Recommended IDE Setup
- VS Code with extensions:
- Volar (Vue 3 support)
- TypeScript Vue Plugin
- ESLint
### Type Checking
```bash
npm run build # Includes type checking with vue-tsc
```
### Linting
```bash
npm run lint
```
## 📦 Dependencies
### Core
- **Vue 3** - Progressive JavaScript framework
- **Vite** - Fast build tool
- **TypeScript** - Type safety
- **Vue Router** - Client-side routing
- **Pinia** - State management
- **Axios** - HTTP client
### Dev Dependencies
- vue-tsc - Vue TypeScript compiler
- ESLint - Code linting
- TypeScript ESLint - TypeScript linting rules
## 🚀 Deployment
### Standalone Deployment
```bash
# Build
npm run build
# Serve with any static file server
npx serve dist
```
### Integration with Backend
The built frontend can be served by FastAPI:
```python
# backend/app.py
from fastapi.staticfiles import StaticFiles
app.mount("/", StaticFiles(directory="frontend/dist", html=True), name="static")
```
## 📱 Responsive Design
- Desktop-first design
- Breakpoint: 768px for mobile
- Touch-friendly controls
- Optimized for tablets and phones
## 🎯 Roadmap
### Phase 1 (Current)
- ✅ Dashboard
- ✅ Worker management
- ⏳ Job queue view
### Phase 2
- ⏳ Scanner controls
- ⏳ Rules editor
- ⏳ Settings page
### Phase 3
- ⏳ WebSocket support for real-time updates
- ⏳ Advanced filtering and search
- ⏳ Job logs viewer
- ⏳ Dark/light theme toggle
## 🐛 Known Issues
- Auto-refresh uses polling (will migrate to WebSocket)
- Some views are placeholders
- No authentication yet
## 📄 License
MIT License - Same as backend

8
frontend/env.d.ts vendored Normal file
View File

@@ -0,0 +1,8 @@
/// <reference types="vite/client" />
declare module '*.vue' {
import type { DefineComponent } from 'vue'
const component: DefineComponent<{}, {}, any>
export default component
}

14
frontend/index.html Normal file
View File

@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TranscriptorIO</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.ts"></script>
</body>
</html>

3313
frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

29
frontend/package.json Normal file
View File

@@ -0,0 +1,29 @@
{
"name": "transcriptorio-ui",
"version": "1.0.0",
"description": "TranscriptorIO Web UI - Vue 3 Frontend",
"type": "module",
"scripts": {
"dev": "vite",
"build": "vue-tsc && vite build",
"preview": "vite preview",
"lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore"
},
"dependencies": {
"vue": "^3.4.0",
"vue-router": "^4.2.5",
"pinia": "^2.1.7",
"axios": "^1.6.5"
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.0.3",
"@vue/tsconfig": "^0.5.1",
"typescript": "~5.3.0",
"vite": "^5.0.11",
"vue-tsc": "^1.8.27",
"@typescript-eslint/eslint-plugin": "^6.19.0",
"@typescript-eslint/parser": "^6.19.0",
"eslint": "^8.56.0",
"eslint-plugin-vue": "^9.20.1"
}
}

79
frontend/setup.sh Executable file
View File

@@ -0,0 +1,79 @@
#!/bin/bash
echo "🎬 TranscriptorIO Frontend - Setup Script"
echo "=========================================="
echo ""
# Check if Node.js is installed
if ! command -v node &> /dev/null; then
echo "❌ Node.js is not installed"
echo ""
echo "Please install Node.js 18+ using one of these methods:"
echo ""
echo "Method 1: Using nvm (recommended)"
echo " curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash"
echo " source ~/.bashrc # or ~/.zshrc"
echo " nvm install 18"
echo " nvm use 18"
echo ""
echo "Method 2: Using package manager"
echo " Ubuntu/Debian: sudo apt install nodejs npm"
echo " Fedora: sudo dnf install nodejs npm"
echo " Arch: sudo pacman -S nodejs npm"
echo ""
exit 1
fi
NODE_VERSION=$(node --version)
echo "✅ Node.js detected: $NODE_VERSION"
# Check if npm is installed
if ! command -v npm &> /dev/null; then
echo "❌ npm is not installed"
exit 1
fi
NPM_VERSION=$(npm --version)
echo "✅ npm detected: v$NPM_VERSION"
echo ""
# Navigate to frontend directory
cd "$(dirname "$0")"
# Check if package.json exists
if [ ! -f "package.json" ]; then
echo "❌ package.json not found. Are you in the frontend directory?"
exit 1
fi
# Install dependencies
echo "📦 Installing dependencies..."
echo ""
npm install
if [ $? -eq 0 ]; then
echo ""
echo "✅ Dependencies installed successfully!"
echo ""
echo "=========================================="
echo "🚀 Next Steps"
echo "=========================================="
echo ""
echo "1. Make sure the backend is running:"
echo " cd ../backend"
echo " python cli.py server"
echo ""
echo "2. Start the frontend dev server:"
echo " cd frontend"
echo " npm run dev"
echo ""
echo "3. Open your browser:"
echo " http://localhost:3000"
echo ""
echo "=========================================="
else
echo ""
echo "❌ Failed to install dependencies"
exit 1
fi

108
frontend/src/App.vue Normal file
View File

@@ -0,0 +1,108 @@
<template>
<div id="app">
<!-- Connection Warning (shows when backend is offline) -->
<ConnectionWarning />
<!-- Setup Wizard (first run only) -->
<SetupWizard v-if="showSetupWizard" @complete="onSetupComplete" />
<header class="app-header">
<div class="container">
<div class="header-content">
<div class="logo">
<h1>🎬 TranscriptorIO</h1>
<span class="subtitle">AI-Powered Subtitle Transcription</span>
</div>
<nav class="main-nav">
<router-link to="/" class="nav-link">Dashboard</router-link>
<router-link to="/workers" class="nav-link">Workers</router-link>
<router-link to="/queue" class="nav-link">Queue</router-link>
<router-link v-if="configStore.isStandalone" to="/scanner" class="nav-link">Scanner</router-link>
<router-link v-if="configStore.isStandalone" to="/rules" class="nav-link">Rules</router-link>
<router-link to="/settings" class="nav-link">Settings</router-link>
</nav>
<div class="status-indicator" :class="{ 'online': systemStore.isOnline }">
<span class="status-dot"></span>
<span class="status-text">{{ systemStore.isOnline ? 'Online' : 'Offline' }}</span>
</div>
</div>
</div>
</header>
<main class="app-main">
<div class="container">
<router-view v-slot="{ Component }">
<transition name="fade" mode="out-in">
<component :is="Component" />
</transition>
</router-view>
</div>
</main>
<footer class="app-footer">
<div class="container">
<p>&copy; 2026 TranscriptorIO | Powered by Whisper AI</p>
</div>
</footer>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted, onUnmounted } from 'vue'
import { useSystemStore } from '@/stores/system'
import { useConfigStore } from '@/stores/config'
import SetupWizard from '@/components/SetupWizard.vue'
import ConnectionWarning from '@/components/ConnectionWarning.vue'
import axios from 'axios'
const systemStore = useSystemStore()
const configStore = useConfigStore()
const showSetupWizard = ref(false)
let statusInterval: number | null = null
const checkStatus = async () => {
try {
await systemStore.fetchStatus()
} catch (error) {
// Error already handled in store
}
}
const checkSetupStatus = async () => {
try {
const response = await axios.get('/api/setup/status')
if (response.data.is_first_run && !response.data.setup_completed) {
showSetupWizard.value = true
}
} catch (error) {
console.error('Failed to check setup status:', error)
}
}
const onSetupComplete = () => {
showSetupWizard.value = false
// Refresh page to apply new settings
window.location.reload()
}
onMounted(() => {
checkSetupStatus()
checkStatus()
configStore.fetchConfig()
configStore.detectGPU()
// Check status every 10 seconds
statusInterval = window.setInterval(checkStatus, 10000)
})
onUnmounted(() => {
if (statusInterval) {
clearInterval(statusInterval)
}
})
</script>
<style>
/* Global styles in main.css */
</style>

View File

@@ -0,0 +1,429 @@
:root {
/* Colors - Tdarr-inspired dark theme */
--primary-bg: #1a1d29;
--secondary-bg: #23283a;
--tertiary-bg: #2d3448;
--accent-color: #4a9eff;
--accent-hover: #357abd;
--success-color: #28a745;
--warning-color: #ffc107;
--danger-color: #dc3545;
--text-primary: #e4e6eb;
--text-secondary: #b8bcc8;
--text-muted: #8b92a6;
--border-color: #3a3f55;
/* Spacing */
--spacing-xs: 0.25rem;
--spacing-sm: 0.5rem;
--spacing-md: 1rem;
--spacing-lg: 1.5rem;
--spacing-xl: 2rem;
/* Border radius */
--radius-sm: 4px;
--radius-md: 8px;
--radius-lg: 12px;
/* Transitions */
--transition-fast: 0.15s ease;
--transition-normal: 0.3s ease;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background-color: var(--primary-bg);
color: var(--text-primary);
line-height: 1.6;
}
#app {
min-height: 100vh;
display: flex;
flex-direction: column;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 0 var(--spacing-lg);
width: 100%;
}
/* Header */
.app-header {
background-color: var(--secondary-bg);
border-bottom: 2px solid var(--border-color);
padding: var(--spacing-md) 0;
position: sticky;
top: 0;
z-index: 1000;
}
.header-content {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing-xl);
}
.logo h1 {
font-size: 1.5rem;
font-weight: 700;
color: var(--accent-color);
margin-bottom: var(--spacing-xs);
}
.logo .subtitle {
font-size: 0.75rem;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: 1px;
}
.main-nav {
display: flex;
gap: var(--spacing-sm);
flex: 1;
}
.nav-link {
padding: var(--spacing-sm) var(--spacing-md);
color: var(--text-secondary);
text-decoration: none;
border-radius: var(--radius-sm);
transition: all var(--transition-fast);
font-weight: 500;
}
.nav-link:hover {
background-color: var(--tertiary-bg);
color: var(--text-primary);
}
.nav-link.router-link-active {
background-color: var(--accent-color);
color: white;
}
.status-indicator {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm) var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-md);
}
.status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background-color: var(--danger-color);
animation: pulse 2s infinite;
}
.status-indicator.online .status-dot {
background-color: var(--success-color);
}
.status-text {
font-size: 0.875rem;
color: var(--text-secondary);
font-weight: 500;
}
/* Main content */
.app-main {
flex: 1;
padding: var(--spacing-xl) 0;
}
/* Footer */
.app-footer {
background-color: var(--secondary-bg);
border-top: 1px solid var(--border-color);
padding: var(--spacing-md) 0;
text-align: center;
color: var(--text-muted);
font-size: 0.875rem;
}
/* Cards */
.card {
background-color: var(--secondary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-lg);
padding: var(--spacing-lg);
margin-bottom: var(--spacing-lg);
}
.card-header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: var(--spacing-lg);
padding-bottom: var(--spacing-md);
border-bottom: 1px solid var(--border-color);
}
.card-title {
font-size: 1.25rem;
font-weight: 600;
color: var(--text-primary);
}
.card-body {
color: var(--text-secondary);
}
/* Buttons */
.btn {
display: inline-flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm) var(--spacing-md);
border: none;
border-radius: var(--radius-sm);
font-size: 0.875rem;
font-weight: 500;
cursor: pointer;
transition: all var(--transition-fast);
text-decoration: none;
}
.btn-primary {
background-color: var(--accent-color);
color: white;
}
.btn-primary:hover {
background-color: var(--accent-hover);
}
.btn-success {
background-color: var(--success-color);
color: white;
}
.btn-success:hover {
opacity: 0.9;
}
.btn-danger {
background-color: var(--danger-color);
color: white;
}
.btn-danger:hover {
opacity: 0.9;
}
.btn-secondary {
background-color: var(--tertiary-bg);
color: var(--text-primary);
}
.btn-secondary:hover {
background-color: var(--border-color);
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
/* Tables */
.table {
width: 100%;
border-collapse: collapse;
margin-top: var(--spacing-md);
}
.table th,
.table td {
padding: var(--spacing-md);
text-align: left;
border-bottom: 1px solid var(--border-color);
}
.table th {
background-color: var(--tertiary-bg);
color: var(--text-secondary);
font-weight: 600;
font-size: 0.875rem;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.table tbody tr:hover {
background-color: var(--tertiary-bg);
}
/* Status badges */
.badge {
display: inline-block;
padding: var(--spacing-xs) var(--spacing-sm);
border-radius: var(--radius-sm);
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.badge-queued {
background-color: rgba(255, 193, 7, 0.2);
color: var(--warning-color);
}
.badge-processing {
background-color: rgba(74, 158, 255, 0.2);
color: var(--accent-color);
}
.badge-completed {
background-color: rgba(40, 167, 69, 0.2);
color: var(--success-color);
}
.badge-failed {
background-color: rgba(220, 53, 69, 0.2);
color: var(--danger-color);
}
.badge-cancelled {
background-color: rgba(139, 146, 166, 0.2);
color: var(--text-muted);
}
/* Progress bar */
.progress {
width: 100%;
height: 8px;
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
overflow: hidden;
}
.progress-bar {
height: 100%;
background-color: var(--accent-color);
transition: width var(--transition-normal);
}
/* Animations */
@keyframes pulse {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.5;
}
}
.fade-enter-active,
.fade-leave-active {
transition: opacity var(--transition-normal);
}
.fade-enter-from,
.fade-leave-to {
opacity: 0;
}
/* Loading spinner */
.spinner {
border: 3px solid var(--tertiary-bg);
border-top: 3px solid var(--accent-color);
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: var(--spacing-xl) auto;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Grid system */
.grid {
display: grid;
gap: var(--spacing-lg);
}
.grid-2 {
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
}
.grid-3 {
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
}
.grid-4 {
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
}
/* Utility classes */
.text-center {
text-align: center;
}
.text-right {
text-align: right;
}
.mt-lg {
margin-top: var(--spacing-lg);
}
.mb-lg {
margin-bottom: var(--spacing-lg);
}
.flex {
display: flex;
}
.flex-between {
display: flex;
justify-content: space-between;
align-items: center;
}
.flex-center {
display: flex;
justify-content: center;
align-items: center;
}
.gap-md {
gap: var(--spacing-md);
}
/* Responsive */
@media (max-width: 768px) {
.header-content {
flex-direction: column;
gap: var(--spacing-md);
}
.main-nav {
flex-wrap: wrap;
justify-content: center;
}
.grid-2,
.grid-3,
.grid-4 {
grid-template-columns: 1fr;
}
}

View File

@@ -0,0 +1,166 @@
<template>
<Transition name="slide-down">
<div v-if="!isOnline" class="connection-overlay">
<div class="connection-banner">
<div class="banner-icon"></div>
<div class="banner-content">
<h2 class="banner-title">No Connection to Backend</h2>
<p class="banner-message">
The backend server is not responding. Please check that the server is running and try again.
</p>
<p class="banner-status">
Attempting to reconnect...
<span class="reconnect-indicator"></span>
</p>
</div>
</div>
<div class="overlay-backdrop"></div>
</div>
</Transition>
</template>
<script setup lang="ts">
import { computed } from 'vue'
import { useSystemStore } from '@/stores/system'
const systemStore = useSystemStore()
const isOnline = computed(() => systemStore.isOnline)
</script>
<style scoped>
.connection-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
z-index: 99999;
display: flex;
justify-content: center;
padding-top: var(--spacing-xl);
}
.overlay-backdrop {
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: rgba(0, 0, 0, 0.85);
backdrop-filter: blur(4px);
z-index: 1;
}
.connection-banner {
position: relative;
z-index: 2;
max-width: 600px;
width: calc(100% - 2 * var(--spacing-xl));
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%);
border: 3px solid #ff4444;
border-radius: var(--radius-lg);
padding: var(--spacing-xl);
box-shadow: 0 20px 60px rgba(255, 68, 68, 0.5);
animation: shake 0.5s ease-in-out;
height: fit-content;
}
@keyframes shake {
0%, 100% { transform: translateX(0); }
10%, 30%, 50%, 70%, 90% { transform: translateX(-10px); }
20%, 40%, 60%, 80% { transform: translateX(10px); }
}
.banner-icon {
font-size: 4rem;
text-align: center;
margin-bottom: var(--spacing-md);
animation: pulse 2s infinite;
}
@keyframes pulse {
0%, 100% { transform: scale(1); opacity: 1; }
50% { transform: scale(1.1); opacity: 0.8; }
}
.banner-content {
text-align: center;
}
.banner-title {
font-size: 2rem;
font-weight: 700;
color: white;
margin-bottom: var(--spacing-md);
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
.banner-message {
font-size: 1.125rem;
color: rgba(255, 255, 255, 0.95);
margin-bottom: var(--spacing-lg);
line-height: 1.6;
}
.banner-status {
font-size: 1rem;
color: rgba(255, 255, 255, 0.85);
display: flex;
align-items: center;
justify-content: center;
gap: var(--spacing-sm);
font-weight: 500;
}
.reconnect-indicator {
display: inline-block;
animation: blink 1.5s infinite;
color: white;
font-size: 1.5rem;
}
@keyframes blink {
0%, 49% { opacity: 1; }
50%, 100% { opacity: 0; }
}
/* Transition animations */
.slide-down-enter-active {
transition: all 0.4s ease-out;
}
.slide-down-leave-active {
transition: all 0.3s ease-in;
}
.slide-down-enter-from {
transform: translateY(-100%);
opacity: 0;
}
.slide-down-leave-to {
transform: translateY(-100%);
opacity: 0;
}
@media (max-width: 768px) {
.connection-banner {
width: calc(100% - 2 * var(--spacing-md));
padding: var(--spacing-lg);
}
.banner-title {
font-size: 1.5rem;
}
.banner-message {
font-size: 1rem;
}
.banner-icon {
font-size: 3rem;
}
}
</style>

View File

@@ -0,0 +1,293 @@
<template>
<div class="path-browser">
<div class="browser-header">
<button @click="emit('close')" class="btn-close"></button>
<h3>Select Directory</h3>
</div>
<div class="current-path">
<span class="path-label">Current:</span>
<code>{{ currentPath || '/' }}</code>
</div>
<div class="browser-body">
<!-- Error message -->
<div v-if="error" class="error-message">
{{ error }}
</div>
<!-- Parent directory button -->
<div v-if="currentPath !== '/'" class="dir-item" @click="goUp">
<span class="dir-icon">📁</span>
<span class="dir-name">..</span>
</div>
<!-- Directory list -->
<div
v-for="item in directories"
:key="item.path"
class="dir-item"
:class="{ 'dir-item-disabled': !item.is_readable }"
@click="openDirectory(item)"
>
<span class="dir-icon">{{ item.is_readable ? '📁' : '🔒' }}</span>
<span class="dir-name">{{ item.name }}</span>
</div>
<div v-if="loading" class="loading-state">
<span class="spinner-small"></span>
Loading...
</div>
<div v-if="!loading && !error && directories.length === 0" class="empty-dirs">
No subdirectories found
</div>
</div>
<div class="browser-footer">
<button @click="emit('close')" class="btn btn-secondary">Cancel</button>
<button @click="selectPath" class="btn btn-primary">
Select This Path
</button>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import axios from 'axios'
const emit = defineEmits(['select', 'close'])
interface DirectoryItem {
name: string
path: string
is_directory: boolean
is_readable: boolean
}
const currentPath = ref('/')
const directories = ref<DirectoryItem[]>([])
const loading = ref(false)
const error = ref<string | null>(null)
async function loadDirectories(path: string) {
loading.value = true
error.value = null
try {
const response = await axios.get('/api/filesystem/browse', {
params: { path }
})
currentPath.value = response.data.current_path
directories.value = response.data.items.filter((item: DirectoryItem) => item.is_readable)
} catch (err: any) {
console.error('Failed to load directories:', err)
error.value = err.response?.data?.detail || 'Failed to load directories'
directories.value = []
} finally {
loading.value = false
}
}
async function loadCommonPaths() {
loading.value = true
try {
const response = await axios.get('/api/filesystem/common-paths')
directories.value = response.data.filter((item: DirectoryItem) => item.is_readable)
} catch (err) {
console.error('Failed to load common paths:', err)
// Fallback to root
loadDirectories('/')
} finally {
loading.value = false
}
}
function openDirectory(item: DirectoryItem) {
if (!item.is_readable) {
error.value = 'Permission denied'
return
}
loadDirectories(item.path)
}
function goUp() {
const parts = currentPath.value.split('/').filter(p => p)
parts.pop()
const parentPath = parts.length === 0 ? '/' : '/' + parts.join('/')
loadDirectories(parentPath)
}
function selectPath() {
emit('select', currentPath.value)
emit('close')
}
onMounted(() => {
// Start with common paths
loadCommonPaths()
})
</script>
<style scoped>
.path-browser {
background: var(--tertiary-bg);
border: 2px solid var(--border-color);
border-radius: var(--radius-md);
max-width: 600px;
max-height: 70vh;
display: flex;
flex-direction: column;
}
.browser-header {
display: flex;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-md);
border-bottom: 1px solid var(--border-color);
}
.browser-header h3 {
flex: 1;
margin: 0;
font-size: 1.125rem;
color: var(--text-primary);
}
.btn-close {
background: none;
border: none;
color: var(--text-secondary);
font-size: 1.5rem;
cursor: pointer;
padding: 0;
width: 32px;
height: 32px;
display: flex;
align-items: center;
justify-content: center;
border-radius: var(--radius-sm);
transition: all var(--transition-fast);
}
.btn-close:hover {
background-color: var(--secondary-bg);
color: var(--text-primary);
}
.current-path {
padding: var(--spacing-md);
background: var(--secondary-bg);
border-bottom: 1px solid var(--border-color);
display: flex;
align-items: center;
gap: var(--spacing-sm);
}
.path-label {
color: var(--text-secondary);
font-size: 0.875rem;
font-weight: 600;
}
.current-path code {
flex: 1;
background: var(--primary-bg);
padding: var(--spacing-xs) var(--spacing-sm);
border-radius: var(--radius-sm);
color: var(--accent-color);
font-family: monospace;
font-size: 0.875rem;
}
.browser-body {
flex: 1;
overflow-y: auto;
padding: var(--spacing-sm);
min-height: 300px;
max-height: 400px;
}
.dir-item {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm) var(--spacing-md);
border-radius: var(--radius-sm);
cursor: pointer;
transition: background-color var(--transition-fast);
margin-bottom: var(--spacing-xs);
}
.dir-item:hover {
background-color: var(--secondary-bg);
}
.dir-item-disabled {
opacity: 0.5;
cursor: not-allowed;
}
.dir-item-disabled:hover {
background-color: transparent;
}
.error-message {
background-color: rgba(255, 68, 68, 0.1);
border: 1px solid rgba(255, 68, 68, 0.3);
border-radius: var(--radius-sm);
padding: var(--spacing-md);
margin-bottom: var(--spacing-md);
color: #ff6b6b;
font-size: 0.875rem;
}
.dir-icon {
font-size: 1.25rem;
}
.dir-name {
color: var(--text-primary);
font-weight: 500;
}
.loading-state {
display: flex;
align-items: center;
justify-content: center;
gap: var(--spacing-sm);
padding: var(--spacing-xl);
color: var(--text-secondary);
}
.spinner-small {
border: 2px solid var(--tertiary-bg);
border-top: 2px solid var(--accent-color);
border-radius: 50%;
width: 16px;
height: 16px;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.empty-dirs {
text-align: center;
padding: var(--spacing-xl);
color: var(--text-muted);
}
.browser-footer {
display: flex;
justify-content: flex-end;
gap: var(--spacing-md);
padding: var(--spacing-md);
border-top: 1px solid var(--border-color);
}
</style>

File diff suppressed because it is too large Load Diff

13
frontend/src/main.ts Normal file
View File

@@ -0,0 +1,13 @@
import { createApp } from 'vue'
import { createPinia } from 'pinia'
import App from './App.vue'
import router from './router'
import './assets/css/main.css'
const app = createApp(App)
app.use(createPinia())
app.use(router)
app.mount('#app')

View File

@@ -0,0 +1,54 @@
import { createRouter, createWebHistory } from 'vue-router'
import type { RouteRecordRaw } from 'vue-router'
const routes: RouteRecordRaw[] = [
{
path: '/',
name: 'Dashboard',
component: () => import('@/views/DashboardView.vue'),
meta: { title: 'Dashboard' }
},
{
path: '/workers',
name: 'Workers',
component: () => import('@/views/WorkersView.vue'),
meta: { title: 'Workers' }
},
{
path: '/queue',
name: 'Queue',
component: () => import('@/views/QueueView.vue'),
meta: { title: 'Job Queue' }
},
{
path: '/scanner',
name: 'Scanner',
component: () => import('@/views/ScannerView.vue'),
meta: { title: 'Library Scanner' }
},
{
path: '/rules',
name: 'Rules',
component: () => import('@/views/RulesView.vue'),
meta: { title: 'Scan Rules' }
},
{
path: '/settings',
name: 'Settings',
component: () => import('@/views/SettingsView.vue'),
meta: { title: 'Settings' }
}
]
const router = createRouter({
history: createWebHistory(import.meta.env.BASE_URL),
routes
})
router.beforeEach((to, _from, next) => {
document.title = `${to.meta.title || 'TranscriptorIO'} - TranscriptorIO`
next()
})
export default router

View File

@@ -0,0 +1,101 @@
import axios from 'axios'
import type {
SystemStatus,
Worker,
WorkerPoolStats,
AddWorkerRequest,
Job,
JobList,
QueueStats,
CreateJobRequest,
ScanRule,
CreateScanRuleRequest,
ScannerStatus,
ScanRequest,
ScanResult
} from '@/types/api'
const api = axios.create({
baseURL: '/api',
timeout: 30000,
headers: {
'Content-Type': 'application/json'
}
})
// Request interceptor
api.interceptors.request.use(
(config) => {
return config
},
(error) => {
return Promise.reject(error)
}
)
// Response interceptor
api.interceptors.response.use(
(response) => {
return response
},
(error) => {
console.error('API Error:', error.response?.data || error.message)
return Promise.reject(error)
}
)
// System API
export const systemApi = {
getStatus: () => api.get<SystemStatus>('/status'),
getHealth: () => api.get('/health')
}
// Workers API
export const workersApi = {
getAll: () => api.get<Worker[]>('/workers'),
getStats: () => api.get<WorkerPoolStats>('/workers/stats'),
getById: (id: string) => api.get<Worker>(`/workers/${id}`),
add: (data: AddWorkerRequest) => api.post<Worker>('/workers', data),
remove: (id: string, timeout = 30) => api.delete(`/workers/${id}`, { params: { timeout } }),
startPool: (cpuWorkers = 0, gpuWorkers = 0) =>
api.post('/workers/pool/start', null, { params: { cpu_workers: cpuWorkers, gpu_workers: gpuWorkers } }),
stopPool: (timeout = 30) => api.post('/workers/pool/stop', null, { params: { timeout } })
}
// Jobs API
export const jobsApi = {
getAll: (statusFilter?: string, page = 1, pageSize = 50) =>
api.get<JobList>('/jobs', { params: { status_filter: statusFilter, page, page_size: pageSize } }),
getStats: () => api.get<QueueStats>('/jobs/stats'),
getById: (id: string) => api.get<Job>(`/jobs/${id}`),
create: (data: CreateJobRequest) => api.post<Job>('/jobs', data),
retry: (id: string) => api.post<Job>(`/jobs/${id}/retry`),
cancel: (id: string) => api.delete(`/jobs/${id}`),
clearCompleted: () => api.post('/jobs/queue/clear')
}
// Scan Rules API
export const scanRulesApi = {
getAll: (enabledOnly = false) => api.get<ScanRule[]>('/scan-rules', { params: { enabled_only: enabledOnly } }),
getById: (id: number) => api.get<ScanRule>(`/scan-rules/${id}`),
create: (data: CreateScanRuleRequest) => api.post<ScanRule>('/scan-rules', data),
update: (id: number, data: Partial<CreateScanRuleRequest>) => api.put<ScanRule>(`/scan-rules/${id}`, data),
delete: (id: number) => api.delete(`/scan-rules/${id}`),
toggle: (id: number) => api.post<ScanRule>(`/scan-rules/${id}/toggle`)
}
// Scanner API
export const scannerApi = {
getStatus: () => api.get<ScannerStatus>('/scanner/status'),
scan: (data: ScanRequest) => api.post<ScanResult>('/scanner/scan', data),
startScheduler: (cronExpression: string, paths: string[], recursive = true) =>
api.post('/scanner/scheduler/start', { enabled: true, cron_expression: cronExpression, paths, recursive }),
stopScheduler: () => api.post('/scanner/scheduler/stop'),
startWatcher: (paths: string[], recursive = true) =>
api.post('/scanner/watcher/start', { enabled: true, paths, recursive }),
stopWatcher: () => api.post('/scanner/watcher/stop'),
analyzeFile: (filePath: string) => api.post('/scanner/analyze', null, { params: { file_path: filePath } })
}
export default api

View File

@@ -0,0 +1,47 @@
import { defineStore } from 'pinia'
import { ref, computed } from 'vue'
import axios from 'axios'
export const useConfigStore = defineStore('config', () => {
const operationMode = ref<'standalone' | 'bazarr_slave'>('standalone')
const hasGPU = ref(false)
const loading = ref(false)
const isStandalone = computed(() => operationMode.value === 'standalone')
const isBazarrSlave = computed(() => operationMode.value === 'bazarr_slave')
async function fetchConfig() {
loading.value = true
try {
// Get operation mode from settings
const response = await axios.get('/api/settings/operation_mode')
operationMode.value = response.data.value === 'bazarr_slave' ? 'bazarr_slave' : 'standalone'
} catch (error) {
console.error('Failed to fetch operation mode:', error)
} finally {
loading.value = false
}
}
async function detectGPU() {
try {
// Try to get system resources to detect GPU
const response = await axios.get('/api/system/resources')
hasGPU.value = response.data.gpus && response.data.gpus.length > 0
} catch (error) {
// If endpoint doesn't exist, assume no GPU detection available
hasGPU.value = false
}
}
return {
operationMode,
hasGPU,
loading,
isStandalone,
isBazarrSlave,
fetchConfig,
detectGPU
}
})

125
frontend/src/stores/jobs.ts Normal file
View File

@@ -0,0 +1,125 @@
import { defineStore } from 'pinia'
import { ref } from 'vue'
import { jobsApi } from '@/services/api'
import type { Job, JobList, QueueStats, CreateJobRequest } from '@/types/api'
export const useJobsStore = defineStore('jobs', () => {
const jobs = ref<Job[]>([])
const stats = ref<QueueStats | null>(null)
const totalJobs = ref(0)
const currentPage = ref(1)
const pageSize = ref(50)
const loading = ref(false)
const error = ref<string | null>(null)
async function fetchJobs(statusFilter?: string, page = 1) {
loading.value = true
error.value = null
currentPage.value = page
try {
const response = await jobsApi.getAll(statusFilter, page, pageSize.value)
jobs.value = response.data.jobs
totalJobs.value = response.data.total
} catch (err: any) {
error.value = err.message || 'Failed to fetch jobs'
throw err
} finally {
loading.value = false
}
}
async function fetchStats() {
try {
const response = await jobsApi.getStats()
stats.value = response.data
} catch (err: any) {
error.value = err.message || 'Failed to fetch job stats'
throw err
}
}
async function createJob(data: CreateJobRequest) {
loading.value = true
error.value = null
try {
const response = await jobsApi.create(data)
jobs.value.unshift(response.data)
await fetchStats()
return response.data
} catch (err: any) {
error.value = err.message || 'Failed to create job'
throw err
} finally {
loading.value = false
}
}
async function retryJob(id: string) {
loading.value = true
error.value = null
try {
const response = await jobsApi.retry(id)
const index = jobs.value.findIndex(j => j.id === id)
if (index !== -1) {
jobs.value[index] = response.data
}
await fetchStats()
return response.data
} catch (err: any) {
error.value = err.message || 'Failed to retry job'
throw err
} finally {
loading.value = false
}
}
async function cancelJob(id: string) {
loading.value = true
error.value = null
try {
await jobsApi.cancel(id)
const index = jobs.value.findIndex(j => j.id === id)
if (index !== -1) {
jobs.value[index].status = 'cancelled'
}
await fetchStats()
} catch (err: any) {
error.value = err.message || 'Failed to cancel job'
throw err
} finally {
loading.value = false
}
}
async function clearCompleted() {
loading.value = true
error.value = null
try {
await jobsApi.clearCompleted()
jobs.value = jobs.value.filter(j => j.status !== 'completed')
await fetchStats()
} catch (err: any) {
error.value = err.message || 'Failed to clear completed jobs'
throw err
} finally {
loading.value = false
}
}
return {
jobs,
stats,
totalJobs,
currentPage,
pageSize,
loading,
error,
fetchJobs,
fetchStats,
createJob,
retryJob,
cancelJob,
clearCompleted
}
})

View File

@@ -0,0 +1,48 @@
import { defineStore } from 'pinia'
import { ref } from 'vue'
import { systemApi } from '@/services/api'
import type { SystemStatus } from '@/types/api'
export const useSystemStore = defineStore('system', () => {
const status = ref<SystemStatus | null>(null)
const loading = ref(false)
const error = ref<string | null>(null)
const isOnline = ref(true)
async function fetchStatus() {
loading.value = true
error.value = null
try {
const response = await systemApi.getStatus()
status.value = response.data
isOnline.value = true
} catch (err: any) {
error.value = err.message || 'Failed to fetch system status'
isOnline.value = false
throw err
} finally {
loading.value = false
}
}
async function checkHealth() {
try {
await systemApi.getHealth()
isOnline.value = true
return true
} catch (err) {
isOnline.value = false
return false
}
}
return {
status,
loading,
error,
isOnline,
fetchStatus,
checkHealth
}
})

View File

@@ -0,0 +1,110 @@
import { defineStore } from 'pinia'
import { ref } from 'vue'
import { workersApi } from '@/services/api'
import type { Worker, WorkerPoolStats, AddWorkerRequest } from '@/types/api'
export const useWorkersStore = defineStore('workers', () => {
const workers = ref<Worker[]>([])
const stats = ref<WorkerPoolStats | null>(null)
const loading = ref(false)
const error = ref<string | null>(null)
async function fetchWorkers() {
loading.value = true
error.value = null
try {
const response = await workersApi.getAll()
workers.value = response.data
} catch (err: any) {
error.value = err.message || 'Failed to fetch workers'
throw err
} finally {
loading.value = false
}
}
async function fetchStats() {
try {
const response = await workersApi.getStats()
stats.value = response.data
} catch (err: any) {
error.value = err.message || 'Failed to fetch worker stats'
throw err
}
}
async function addWorker(data: AddWorkerRequest) {
loading.value = true
error.value = null
try {
const response = await workersApi.add(data)
workers.value.push(response.data)
await fetchStats()
return response.data
} catch (err: any) {
error.value = err.message || 'Failed to add worker'
throw err
} finally {
loading.value = false
}
}
async function removeWorker(id: string) {
loading.value = true
error.value = null
try {
await workersApi.remove(id)
workers.value = workers.value.filter(w => w.worker_id !== id)
await fetchStats()
} catch (err: any) {
error.value = err.message || 'Failed to remove worker'
throw err
} finally {
loading.value = false
}
}
async function startPool(cpuWorkers: number, gpuWorkers: number) {
loading.value = true
error.value = null
try {
await workersApi.startPool(cpuWorkers, gpuWorkers)
await fetchWorkers()
await fetchStats()
} catch (err: any) {
error.value = err.message || 'Failed to start pool'
throw err
} finally {
loading.value = false
}
}
async function stopPool() {
loading.value = true
error.value = null
try {
await workersApi.stopPool()
workers.value = []
await fetchStats()
} catch (err: any) {
error.value = err.message || 'Failed to stop pool'
throw err
} finally {
loading.value = false
}
}
return {
workers,
stats,
loading,
error,
fetchWorkers,
fetchStats,
addWorker,
removeWorker,
startPool,
stopPool
}
})

159
frontend/src/types/api.ts Normal file
View File

@@ -0,0 +1,159 @@
// API Types matching backend models
export interface SystemStatus {
system: {
status: string
uptime_seconds: number | null
}
workers: WorkerPoolStats
queue: QueueStats
scanner: ScannerStatus
}
export interface WorkerPoolStats {
total_workers: number
cpu_workers: number
gpu_workers: number
idle_workers: number
busy_workers: number
stopped_workers: number
error_workers: number
total_jobs_completed: number
total_jobs_failed: number
uptime_seconds: number | null
is_running: boolean
}
export interface Worker {
worker_id: string
worker_type: 'cpu' | 'gpu'
device_id: number | null
status: 'idle' | 'busy' | 'stopped' | 'error'
current_job_id: string | null
jobs_completed: number
jobs_failed: number
uptime_seconds: number
current_job_progress: number
current_job_eta: number | null
}
export interface Job {
id: string
file_path: string
file_name: string
status: 'queued' | 'processing' | 'completed' | 'failed' | 'cancelled'
priority: number
source_lang: string | null
target_lang: string | null
quality_preset: 'fast' | 'balanced' | 'best'
transcribe_or_translate: string
progress: number
current_stage: string | null
eta_seconds: number | null
created_at: string | null
started_at: string | null
completed_at: string | null
output_path: string | null
segments_count: number | null
error: string | null
retry_count: number
worker_id: string | null
vram_used_mb: number | null
processing_time_seconds: number | null
model_used: string | null
device_used: string | null
}
export interface JobList {
jobs: Job[]
total: number
page: number
page_size: number
}
export interface QueueStats {
total_jobs: number
queued: number
processing: number
completed: number
failed: number
cancelled: number
}
export interface ScanRule {
id: number
name: string
enabled: boolean
priority: number
conditions: ScanRuleConditions
action: ScanRuleAction
created_at: string | null
updated_at: string | null
}
export interface ScanRuleConditions {
audio_language_is: string | null
audio_language_not: string | null
audio_track_count_min: number | null
has_embedded_subtitle_lang: string | null
missing_embedded_subtitle_lang: string | null
missing_external_subtitle_lang: string | null
file_extension: string | null
}
export interface ScanRuleAction {
action_type: 'transcribe' | 'translate'
target_language: string
quality_preset: 'fast' | 'balanced' | 'best'
job_priority: number
}
export interface ScannerStatus {
scheduler_enabled: boolean
scheduler_running: boolean
next_scan_time: string | null
watcher_enabled: boolean
watcher_running: boolean
watched_paths: string[]
last_scan_time: string | null
total_scans: number
}
export interface ScanResult {
scanned_files: number
matched_files: number
jobs_created: number
skipped_files: number
paths_scanned: string[]
}
// Request types
export interface CreateJobRequest {
file_path: string
file_name: string
source_lang?: string
target_lang: string
quality_preset?: 'fast' | 'balanced' | 'best'
transcribe_or_translate?: string
priority?: number
is_manual_request?: boolean
}
export interface AddWorkerRequest {
worker_type: 'cpu' | 'gpu'
device_id?: number
}
export interface CreateScanRuleRequest {
name: string
enabled: boolean
priority: number
conditions: ScanRuleConditions
action: ScanRuleAction
}
export interface ScanRequest {
paths: string[]
recursive: boolean
}

View File

@@ -0,0 +1,907 @@
<template>
<div class="dashboard">
<div class="page-header">
<h1 class="page-title">Dashboard</h1>
<div class="header-actions">
<span class="refresh-indicator" v-if="!loading">
Auto-refresh: <span class="text-success">{{ countdown }}s</span>
</span>
<button @click="loadData" class="btn btn-secondary" :disabled="loading">
<span v-if="loading">Loading...</span>
<span v-else> Refresh Now</span>
</button>
</div>
</div>
<div v-if="loading && !systemStatus" class="spinner"></div>
<div v-else-if="systemStatus" class="dashboard-content">
<!-- Top Row: System Overview Cards -->
<div class="dashboard-grid">
<!-- System Overview -->
<div class="card highlight-card">
<div class="card-header">
<div class="header-icon">🖥</div>
<h2 class="card-title">System Status</h2>
<span :class="['badge', systemStatus.system.status === 'running' ? 'badge-completed' : 'badge-failed']">
{{ systemStatus.system.status }}
</span>
</div>
<div class="card-body">
<div class="stat-row">
<span class="stat-label">Uptime:</span>
<span class="stat-value">{{ formatUptime(systemStatus.system.uptime_seconds) }}</span>
</div>
<div class="stat-row">
<span class="stat-label">Version:</span>
<span class="stat-value">v1.0.0</span>
</div>
<div class="stat-row">
<span class="stat-label">Mode:</span>
<span class="stat-value badge badge-info">
{{ systemStatus.system.mode || 'Standalone' }}
</span>
</div>
</div>
</div>
<!-- Workers Overview -->
<div class="card">
<div class="card-header">
<div class="header-icon"></div>
<h2 class="card-title">Workers</h2>
<router-link to="/workers" class="btn btn-secondary btn-sm">Manage</router-link>
</div>
<div class="card-body">
<div class="stats-grid">
<div class="stat-item">
<div class="stat-number">{{ systemStatus.workers?.pool?.total_workers || 0 }}</div>
<div class="stat-label">Total</div>
</div>
<div class="stat-item">
<div class="stat-number text-success">{{ systemStatus.workers?.pool?.idle_workers || 0 }}</div>
<div class="stat-label">Idle</div>
</div>
<div class="stat-item">
<div class="stat-number text-primary">{{ systemStatus.workers?.pool?.busy_workers || 0 }}</div>
<div class="stat-label">Busy</div>
</div>
<div class="stat-item">
<div class="stat-number">{{ systemStatus.workers?.jobs?.completed || 0 }}</div>
<div class="stat-label">Completed</div>
</div>
</div>
<div class="progress-section">
<div class="progress-label">
<span>Worker Utilization</span>
<span>{{ workerUtilization }}%</span>
</div>
<div class="progress-bar">
<div
class="progress-fill"
:style="{ width: workerUtilization + '%', backgroundColor: getUsageColor(workerUtilization) }"
></div>
</div>
</div>
</div>
</div>
<!-- Queue Overview -->
<div class="card">
<div class="card-header">
<div class="header-icon">📋</div>
<h2 class="card-title">Job Queue</h2>
<router-link to="/queue" class="btn btn-secondary btn-sm">View All</router-link>
</div>
<div class="card-body">
<div class="stats-grid">
<div class="stat-item">
<div class="stat-number">{{ systemStatus.queue?.total || 0 }}</div>
<div class="stat-label">Total</div>
</div>
<div class="stat-item">
<div class="stat-number text-warning">{{ systemStatus.queue?.queued || 0 }}</div>
<div class="stat-label">Queued</div>
</div>
<div class="stat-item">
<div class="stat-number text-primary">{{ systemStatus.queue?.processing || 0 }}</div>
<div class="stat-label">Processing</div>
</div>
<div class="stat-item">
<div class="stat-number text-success">{{ systemStatus.queue?.completed || 0 }}</div>
<div class="stat-label">Completed</div>
</div>
</div>
<div class="queue-chart">
<div
class="queue-bar queue-completed"
:style="{ width: queuePercentage('completed') + '%' }"
:title="`Completed: ${systemStatus.queue.completed}`"
></div>
<div
class="queue-bar queue-processing"
:style="{ width: queuePercentage('processing') + '%' }"
:title="`Processing: ${systemStatus.queue.processing}`"
></div>
<div
class="queue-bar queue-queued"
:style="{ width: queuePercentage('queued') + '%' }"
:title="`Queued: ${systemStatus.queue.queued}`"
></div>
<div
class="queue-bar queue-failed"
:style="{ width: queuePercentage('failed') + '%' }"
:title="`Failed: ${systemStatus.queue.failed}`"
></div>
</div>
</div>
</div>
<!-- Scanner Overview -->
<div class="card">
<div class="card-header">
<div class="header-icon">📁</div>
<h2 class="card-title">Library Scanner</h2>
<router-link to="/scanner" class="btn btn-secondary btn-sm">Configure</router-link>
</div>
<div class="card-body">
<div class="stat-row">
<span class="stat-label">Scheduler:</span>
<span :class="['badge', systemStatus.scanner.scheduler_running ? 'badge-completed' : 'badge-cancelled']">
{{ systemStatus.scanner.scheduler_running ? 'Running' : 'Stopped' }}
</span>
</div>
<div class="stat-row">
<span class="stat-label">File Watcher:</span>
<span :class="['badge', systemStatus.scanner.watcher_running ? 'badge-completed' : 'badge-cancelled']">
{{ systemStatus.scanner.watcher_running ? 'Active' : 'Inactive' }}
</span>
</div>
<div class="stat-row">
<span class="stat-label">Last Scan:</span>
<span class="stat-value">{{ formatDate(systemStatus.scanner.last_scan_time) }}</span>
</div>
<div class="stat-row">
<span class="stat-label">Total Scans:</span>
<span class="stat-value">{{ systemStatus.scanner.total_scans || 0 }}</span>
</div>
</div>
</div>
</div>
<!-- System Resources Section -->
<div class="resources-section">
<h2 class="section-title">
<span class="section-icon">💻</span>
System Resources
</h2>
<div class="resources-grid">
<!-- CPU Card -->
<div class="card resource-card">
<div class="card-header">
<h3 class="card-title">CPU Usage</h3>
<span class="resource-value">{{ systemResources.cpu?.usage_percent?.toFixed(1) || 0 }}%</span>
</div>
<div class="card-body">
<div class="progress-bar large">
<div
class="progress-fill"
:style="{
width: (systemResources.cpu?.usage_percent || 0) + '%',
backgroundColor: getUsageColor(systemResources.cpu?.usage_percent || 0)
}"
></div>
</div>
<div class="resource-details">
<div class="detail-item">
<span class="detail-label">Cores:</span>
<span class="detail-value">{{ systemResources.cpu?.count_logical || 0 }} ({{ systemResources.cpu?.count_physical || 0 }} physical)</span>
</div>
<div class="detail-item">
<span class="detail-label">Frequency:</span>
<span class="detail-value">{{ (systemResources.cpu?.frequency_mhz || 0).toFixed(0) }} MHz</span>
</div>
</div>
</div>
</div>
<!-- RAM Card -->
<div class="card resource-card">
<div class="card-header">
<h3 class="card-title">RAM Usage</h3>
<span class="resource-value">{{ systemResources.memory?.usage_percent?.toFixed(1) || 0 }}%</span>
</div>
<div class="card-body">
<div class="progress-bar large">
<div
class="progress-fill"
:style="{
width: (systemResources.memory?.usage_percent || 0) + '%',
backgroundColor: getUsageColor(systemResources.memory?.usage_percent || 0)
}"
></div>
</div>
<div class="resource-details">
<div class="detail-item">
<span class="detail-label">Used:</span>
<span class="detail-value">{{ (systemResources.memory?.used_gb || 0).toFixed(2) }} GB</span>
</div>
<div class="detail-item">
<span class="detail-label">Total:</span>
<span class="detail-value">{{ (systemResources.memory?.total_gb || 0).toFixed(2) }} GB</span>
</div>
<div class="detail-item">
<span class="detail-label">Free:</span>
<span class="detail-value">{{ (systemResources.memory?.free_gb || 0).toFixed(2) }} GB</span>
</div>
</div>
</div>
</div>
<!-- GPU Cards -->
<div
v-for="(gpu, index) in systemResources.gpus"
:key="index"
class="card resource-card"
>
<div class="card-header">
<h3 class="card-title">{{ gpu.name || `GPU ${index}` }}</h3>
<span class="resource-value">{{ gpu.utilization_percent?.toFixed(1) || 0 }}%</span>
</div>
<div class="card-body">
<div class="progress-bar large">
<div
class="progress-fill"
:style="{
width: (gpu.utilization_percent || 0) + '%',
backgroundColor: getUsageColor(gpu.utilization_percent || 0)
}"
></div>
</div>
<div class="resource-details">
<div class="detail-item">
<span class="detail-label">VRAM Used:</span>
<span class="detail-value">
{{ (gpu.memory_used_mb / 1024).toFixed(2) }} GB
</span>
</div>
<div class="detail-item">
<span class="detail-label">VRAM Total:</span>
<span class="detail-value">
{{ (gpu.memory_total_mb / 1024).toFixed(2) }} GB
</span>
</div>
<div class="detail-item">
<span class="detail-label">VRAM Usage:</span>
<span class="detail-value">
{{ ((gpu.memory_used_mb / gpu.memory_total_mb) * 100).toFixed(1) }}%
</span>
</div>
</div>
</div>
</div>
<!-- No GPUs Message -->
<div v-if="!systemResources.gpus || systemResources.gpus.length === 0" class="card resource-card empty-gpu">
<div class="card-body">
<div class="empty-state">
<p>No GPUs detected</p>
<small>CPU-only mode active</small>
</div>
</div>
</div>
</div>
</div>
<!-- Recent Jobs Section -->
<div class="recent-jobs-section">
<div class="section-header">
<h2 class="section-title">
<span class="section-icon"></span>
Recent Jobs
</h2>
<router-link to="/queue" class="btn btn-secondary">View All Jobs </router-link>
</div>
<div v-if="recentJobs.length === 0" class="empty-state">
<p>No jobs yet</p>
</div>
<div v-else class="table-container">
<table class="jobs-table">
<thead>
<tr>
<th>File Name</th>
<th>Status</th>
<th>Languages</th>
<th>Progress</th>
<th>Worker</th>
<th>Created</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
<tr v-for="job in recentJobs" :key="job.id" :class="'row-' + job.status">
<td class="file-name">
<span class="file-icon">📄</span>
{{ job.file_name }}
</td>
<td>
<span :class="['badge', `badge-${job.status}`]">
{{ job.status }}
</span>
</td>
<td class="languages">
<span class="lang-badge">{{ job.source_lang }}</span>
<span class="arrow"></span>
<span class="lang-badge">{{ job.target_lang }}</span>
</td>
<td>
<div class="progress-cell">
<div class="progress-bar small">
<div
class="progress-fill"
:style="{ width: job.progress + '%' }"
></div>
</div>
<span class="progress-text">{{ job.progress }}%</span>
</div>
</td>
<td>
<span class="worker-badge" v-if="job.worker_id">
{{ job.worker_id }}
</span>
<span v-else class="text-muted"></span>
</td>
<td class="created-date">{{ formatDate(job.created_at) }}</td>
<td class="actions">
<router-link :to="`/queue?job=${job.id}`" class="btn-action" title="View Details">
👁
</router-link>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div v-else class="empty-state">
<p>Unable to load system status</p>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted, onUnmounted } from 'vue'
import { useSystemStore } from '@/stores/system'
import api from '@/services/api'
const systemStore = useSystemStore()
const systemStatus = ref<any>(null)
const systemResources = ref<any>({})
const recentJobs = ref<any[]>([])
const loading = ref(true)
const countdown = ref(5)
let refreshInterval: number | null = null
let countdownInterval: number | null = null
const workerUtilization = computed(() => {
if (!systemStatus.value?.workers?.pool) return 0
const total = systemStatus.value.workers.pool.total_workers
if (total === 0) return 0
return Math.round((systemStatus.value.workers.pool.busy_workers / total) * 100)
})
function queuePercentage(status: string): number {
if (!systemStatus.value?.queue) return 0
const total = systemStatus.value.queue.total
if (total === 0) return 0
const value = systemStatus.value.queue[status] || 0
return (value / total) * 100
}
async function loadData() {
loading.value = true
try {
// Load system status
await systemStore.fetchStatus()
systemStatus.value = systemStore.status
// Load system resources
const resourcesRes = await api.get('/system/resources')
systemResources.value = resourcesRes.data
// Load recent jobs
const jobsRes = await api.get('/jobs?limit=5')
recentJobs.value = jobsRes.data.jobs || []
} catch (error: any) {
console.error('Failed to load dashboard data:', error)
} finally {
loading.value = false
}
}
function formatUptime(seconds: number): string {
if (!seconds) return '0s'
const days = Math.floor(seconds / 86400)
const hours = Math.floor((seconds % 86400) / 3600)
const mins = Math.floor((seconds % 3600) / 60)
if (days > 0) return `${days}d ${hours}h`
if (hours > 0) return `${hours}h ${mins}m`
return `${mins}m`
}
function formatDate(dateStr: string): string {
if (!dateStr) return '—'
// Parse the ISO string (backend sends timezone-aware UTC dates)
const date = new Date(dateStr)
// Check if date is valid
if (isNaN(date.getTime())) return 'Invalid date'
const now = new Date()
const diffMs = now.getTime() - date.getTime()
const diffMins = Math.floor(diffMs / 60000)
if (diffMins < 1) return 'Just now'
if (diffMins < 60) return `${diffMins}m ago`
if (diffMins < 1440) return `${Math.floor(diffMins / 60)}h ago`
return date.toLocaleDateString()
}
function getUsageColor(percent: number): string {
if (percent < 50) return 'var(--success-color)'
if (percent < 80) return 'var(--warning-color)'
return 'var(--danger-color)'
}
function startCountdown() {
countdown.value = 5
countdownInterval = window.setInterval(() => {
countdown.value--
if (countdown.value <= 0) {
countdown.value = 5
}
}, 1000)
}
onMounted(() => {
loadData()
refreshInterval = window.setInterval(loadData, 5000)
startCountdown()
})
onUnmounted(() => {
if (refreshInterval) clearInterval(refreshInterval)
if (countdownInterval) clearInterval(countdownInterval)
})
</script>
<style scoped>
.page-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-xl);
}
.page-title {
font-size: 2.5rem;
font-weight: 700;
color: var(--text-primary);
margin: 0;
background: linear-gradient(135deg, var(--accent-color), var(--primary-color));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.header-actions {
display: flex;
align-items: center;
gap: var(--spacing-md);
}
.refresh-indicator {
font-size: 0.875rem;
color: var(--text-secondary);
}
.dashboard-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: var(--spacing-lg);
margin-bottom: var(--spacing-xl);
}
.card {
background-color: var(--secondary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-md);
transition: transform 0.2s, box-shadow 0.2s;
}
.card:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.highlight-card {
background: linear-gradient(135deg, var(--secondary-bg) 0%, rgba(79, 70, 229, 0.1) 100%);
border-color: var(--accent-color);
}
.card-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-md);
border-bottom: 1px solid var(--border-color);
}
.header-icon {
font-size: 1.5rem;
margin-right: var(--spacing-sm);
}
.card-title {
font-size: 1.125rem;
font-weight: 600;
color: var(--text-primary);
margin: 0;
flex: 1;
}
.card-body {
padding: var(--spacing-md);
}
.stat-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-sm) 0;
}
.stat-label {
color: var(--text-secondary);
font-size: 0.875rem;
}
.stat-value {
color: var(--text-primary);
font-weight: 600;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 1.75rem;
font-weight: 700;
color: var(--text-primary);
margin-bottom: var(--spacing-xs);
}
.progress-section {
margin-top: var(--spacing-md);
padding-top: var(--spacing-md);
border-top: 1px solid var(--border-color);
}
.progress-label {
display: flex;
justify-content: space-between;
margin-bottom: var(--spacing-xs);
font-size: 0.875rem;
color: var(--text-secondary);
}
.progress-bar {
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
overflow: hidden;
height: 8px;
}
.progress-bar.large {
height: 12px;
border-radius: var(--radius-md);
}
.progress-bar.small {
height: 6px;
}
.progress-fill {
height: 100%;
background-color: var(--accent-color);
transition: width 0.3s ease, background-color 0.3s ease;
}
.queue-chart {
margin-top: var(--spacing-md);
display: flex;
height: 24px;
border-radius: var(--radius-sm);
overflow: hidden;
background-color: var(--tertiary-bg);
}
.queue-bar {
transition: width 0.3s ease;
}
.queue-completed {
background-color: var(--success-color);
}
.queue-processing {
background-color: var(--accent-color);
}
.queue-queued {
background-color: var(--warning-color);
}
.queue-failed {
background-color: var(--danger-color);
}
.resources-section,
.recent-jobs-section {
margin-bottom: var(--spacing-xl);
}
.section-title {
font-size: 1.5rem;
font-weight: 600;
color: var(--text-primary);
margin-bottom: var(--spacing-lg);
display: flex;
align-items: center;
gap: var(--spacing-sm);
}
.section-icon {
font-size: 1.75rem;
}
.section-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-lg);
}
.resources-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: var(--spacing-lg);
}
.resource-card .card-header {
flex-direction: column;
align-items: flex-start;
gap: var(--spacing-xs);
}
.resource-card .card-title {
font-size: 1rem;
}
.resource-value {
font-size: 1.5rem;
font-weight: 700;
color: var(--accent-color);
align-self: flex-end;
}
.resource-details {
margin-top: var(--spacing-md);
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.detail-item {
display: flex;
justify-content: space-between;
font-size: 0.875rem;
}
.detail-label {
color: var(--text-secondary);
}
.detail-value {
color: var(--text-primary);
font-weight: 600;
}
.empty-gpu {
display: flex;
align-items: center;
justify-content: center;
}
.empty-state {
text-align: center;
padding: var(--spacing-xl);
color: var(--text-muted);
background-color: var(--secondary-bg);
border-radius: var(--radius-md);
border: 1px solid var(--border-color);
}
.table-container {
overflow-x: auto;
background-color: var(--secondary-bg);
border-radius: var(--radius-md);
border: 1px solid var(--border-color);
}
.jobs-table {
width: 100%;
border-collapse: collapse;
}
.jobs-table th,
.jobs-table td {
padding: var(--spacing-md);
text-align: left;
border-bottom: 1px solid var(--border-color);
}
.jobs-table th {
background-color: var(--tertiary-bg);
font-weight: 600;
color: var(--text-secondary);
text-transform: uppercase;
font-size: 0.75rem;
}
.jobs-table tbody tr {
transition: background-color 0.2s;
}
.jobs-table tbody tr:hover {
background-color: var(--tertiary-bg);
}
.file-name {
display: flex;
align-items: center;
gap: var(--spacing-sm);
font-family: monospace;
font-size: 0.875rem;
}
.file-icon {
font-size: 1.25rem;
}
.languages {
display: flex;
align-items: center;
gap: var(--spacing-xs);
font-family: monospace;
}
.lang-badge {
padding: 2px 6px;
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
font-size: 0.75rem;
}
.arrow {
color: var(--text-muted);
}
.progress-cell {
display: flex;
align-items: center;
gap: var(--spacing-sm);
}
.progress-text {
font-size: 0.75rem;
color: var(--text-secondary);
min-width: 40px;
}
.worker-badge {
padding: 2px 8px;
background-color: var(--accent-color);
border-radius: var(--radius-sm);
font-size: 0.75rem;
font-family: monospace;
}
.created-date {
color: var(--text-secondary);
font-size: 0.875rem;
}
.actions {
text-align: center;
}
.btn-action {
padding: var(--spacing-xs);
border-radius: var(--radius-sm);
transition: background-color 0.2s;
cursor: pointer;
font-size: 1.25rem;
text-decoration: none;
}
.btn-action:hover {
background-color: var(--tertiary-bg);
}
.badge {
padding: 4px 8px;
border-radius: var(--radius-sm);
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
}
.badge-completed {
background-color: var(--success-color);
color: var(--primary-bg);
}
.badge-processing {
background-color: var(--accent-color);
color: var(--primary-bg);
}
.badge-queued {
background-color: var(--warning-color);
color: var(--primary-bg);
}
.badge-failed,
.badge-cancelled {
background-color: var(--danger-color);
color: var(--primary-bg);
}
.badge-info {
background-color: var(--accent-color);
color: var(--primary-bg);
}
.text-success {
color: var(--success-color);
}
.text-primary {
color: var(--accent-color);
}
.text-warning {
color: var(--warning-color);
}
.text-muted {
color: var(--text-muted);
}
</style>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,452 @@
<template>
<div class="rules-view">
<div class="page-header">
<h1 class="page-title">Scan Rules</h1>
<button @click="showCreateModal = true" class="btn btn-primary">Create Rule</button>
</div>
<div v-if="loading" class="spinner"></div>
<div v-else-if="rules.length === 0" class="empty-state">
<p>No scan rules configured yet. Create your first rule to start automatic scanning.</p>
</div>
<div v-else class="rules-grid">
<div v-for="rule in rules" :key="rule.id" class="rule-card">
<div class="rule-header">
<h3 class="rule-name">{{ rule.name }}</h3>
<div class="rule-actions">
<button
@click="toggleRule(rule)"
:class="['btn-toggle', rule.enabled ? 'enabled' : 'disabled']"
:title="rule.enabled ? 'Disable' : 'Enable'"
>
{{ rule.enabled ? '✓' : '✕' }}
</button>
<button @click="editRule(rule)" class="btn-edit" title="Edit"></button>
<button @click="deleteRule(rule.id)" class="btn-delete" title="Delete">🗑</button>
</div>
</div>
<div class="rule-body">
<div class="rule-detail">
<span class="detail-label">Priority:</span>
<span class="detail-value">{{ rule.priority }}</span>
</div>
<div class="rule-detail">
<span class="detail-label">Audio:</span>
<span class="detail-value">{{ rule.conditions?.audio_language_is || 'Any' }}</span>
</div>
<div class="rule-detail">
<span class="detail-label">Action:</span>
<span class="detail-value">{{ rule.action?.action_type }} {{ rule.action?.target_language }}</span>
</div>
<div v-if="rule.conditions?.missing_external_subtitle_lang" class="rule-detail">
<span class="detail-label">Check missing:</span>
<span class="detail-value">{{ rule.conditions.missing_external_subtitle_lang }}</span>
</div>
</div>
</div>
</div>
<!-- Create/Edit Rule Modal -->
<div v-if="showCreateModal || editingRule" class="modal-overlay" @click="closeModal">
<div class="modal-content" @click.stop>
<div class="modal-header">
<h2>{{ editingRule ? 'Edit Rule' : 'Create Rule' }}</h2>
<button @click="closeModal" class="btn-close"></button>
</div>
<div class="modal-body">
<div class="form-group">
<label>Rule Name</label>
<input v-model="formData.name" type="text" class="form-input" placeholder="e.g., Japanese anime to Spanish" />
</div>
<div class="form-group">
<label>Priority (higher = first)</label>
<input v-model.number="formData.priority" type="number" class="form-input" />
</div>
<div class="form-group">
<label>Audio Language (empty = any)</label>
<input v-model="formData.audio_language_is" type="text" class="form-input" placeholder="ja, en, es..." />
</div>
<div class="form-group">
<label>Action Type</label>
<select v-model="formData.action_type" class="form-select" @change="onActionTypeChange">
<option value="transcribe">Transcribe (audio English)</option>
<option value="translate">Translate (audio English target language)</option>
</select>
</div>
<div class="form-group">
<label>
Target Language
<span v-if="formData.action_type === 'transcribe'" class="setting-description">
(Fixed: en - transcribe mode only creates English subtitles)
</span>
</label>
<input
v-if="formData.action_type === 'translate'"
v-model="formData.target_language"
type="text"
class="form-input"
placeholder="es, fr, de, it..."
required
/>
<input
v-else
value="en"
type="text"
class="form-input"
disabled
readonly
/>
</div>
<div class="form-group">
<label>Check Missing Subtitle</label>
<input v-model="formData.missing_external_subtitle_lang" type="text" class="form-input" placeholder="es, en..." />
</div>
<div class="form-group">
<label class="checkbox-label">
<input v-model="formData.enabled" type="checkbox" />
<span>Enabled</span>
</label>
</div>
</div>
<div class="modal-footer">
<button @click="saveRule" class="btn btn-primary">{{ editingRule ? 'Update' : 'Create' }}</button>
<button @click="closeModal" class="btn btn-secondary">Cancel</button>
</div>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import api from '@/services/api'
interface Rule {
id: number
name: string
enabled: boolean
priority: number
conditions: {
audio_language_is?: string | null
audio_language_not?: string | null
audio_track_count_min?: number | null
has_embedded_subtitle_lang?: string | null
missing_embedded_subtitle_lang?: string | null
missing_external_subtitle_lang?: string | null
file_extension?: string | null
}
action: {
action_type: string
target_language: string
quality_preset?: string
job_priority?: number
}
created_at?: string
updated_at?: string
}
const rules = ref<Rule[]>([])
const loading = ref(true)
const showCreateModal = ref(false)
const editingRule = ref<Rule | null>(null)
const formData = ref({
name: '',
priority: 10,
audio_language_is: '',
target_language: 'en', // Default to 'en' for transcribe mode
action_type: 'transcribe',
missing_external_subtitle_lang: '',
enabled: true
})
async function loadRules() {
loading.value = true
try {
const response = await api.get('/scan-rules')
rules.value = response.data || []
} catch (error: any) {
console.error('Failed to load rules:', error)
rules.value = []
} finally {
loading.value = false
}
}
async function toggleRule(rule: Rule) {
try {
await api.post(`/scan-rules/${rule.id}/toggle`)
await loadRules()
} catch (error: any) {
alert('Failed to toggle rule: ' + (error.response?.data?.detail || error.message))
}
}
function onActionTypeChange() {
// When switching to transcribe mode, force target language to 'en'
if (formData.value.action_type === 'transcribe') {
formData.value.target_language = 'en'
}
}
function editRule(rule: Rule) {
editingRule.value = rule
formData.value = {
name: rule.name,
priority: rule.priority,
audio_language_is: rule.conditions?.audio_language_is || '',
target_language: rule.action?.target_language || 'en',
action_type: rule.action?.action_type || 'transcribe',
missing_external_subtitle_lang: rule.conditions?.missing_external_subtitle_lang || '',
enabled: rule.enabled
}
}
async function saveRule() {
try {
// Force target_language to 'en' if action_type is 'transcribe'
const targetLanguage = formData.value.action_type === 'transcribe'
? 'en'
: formData.value.target_language
const payload = {
name: formData.value.name,
enabled: formData.value.enabled,
priority: formData.value.priority,
conditions: {
audio_language_is: formData.value.audio_language_is || null,
missing_external_subtitle_lang: formData.value.missing_external_subtitle_lang || null
},
action: {
action_type: formData.value.action_type,
target_language: targetLanguage,
quality_preset: 'fast',
job_priority: 0
}
}
if (editingRule.value) {
await api.put(`/scan-rules/${editingRule.value.id}`, payload)
} else {
await api.post('/scan-rules', payload)
}
closeModal()
await loadRules()
} catch (error: any) {
alert('Failed to save rule: ' + (error.response?.data?.detail || error.message))
}
}
async function deleteRule(id: number) {
if (!confirm('Delete this rule?')) return
try {
await api.delete(`/scan-rules/${id}`)
await loadRules()
} catch (error: any) {
alert('Failed to delete rule: ' + (error.response?.data?.detail || error.message))
}
}
function closeModal() {
showCreateModal.value = false
editingRule.value = null
formData.value = {
name: '',
priority: 10,
audio_language_is: '',
target_language: 'en', // Default to 'en' for transcribe mode
action_type: 'transcribe',
missing_external_subtitle_lang: '',
enabled: true
}
}
onMounted(() => {
loadRules()
})
</script>
<style scoped>
.page-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-xl);
}
.page-title {
font-size: 2rem;
color: var(--text-primary);
margin: 0;
}
.rules-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
gap: var(--spacing-lg);
}
.rule-card {
background-color: var(--secondary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-md);
padding: var(--spacing-lg);
}
.rule-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-md);
padding-bottom: var(--spacing-md);
border-bottom: 1px solid var(--border-color);
}
.rule-name {
margin: 0;
font-size: 1.125rem;
color: var(--text-primary);
}
.rule-actions {
display: flex;
gap: var(--spacing-xs);
}
.btn-toggle, .btn-edit, .btn-delete {
padding: 4px 8px;
border: 1px solid var(--border-color);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
cursor: pointer;
font-size: 0.875rem;
}
.btn-toggle.enabled {
background-color: var(--success-color);
color: white;
}
.btn-toggle.disabled {
background-color: var(--text-muted);
color: white;
}
.rule-body {
display: flex;
flex-direction: column;
gap: var(--spacing-sm);
}
.rule-detail {
display: flex;
justify-content: space-between;
}
.detail-label {
font-weight: 600;
color: var(--text-secondary);
}
.detail-value {
color: var(--text-primary);
font-family: monospace;
}
.modal-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.7);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
}
.modal-content {
background-color: var(--secondary-bg);
border-radius: var(--radius-md);
border: 1px solid var(--border-color);
max-width: 500px;
width: 90%;
max-height: 80vh;
overflow: auto;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-lg);
border-bottom: 1px solid var(--border-color);
}
.modal-header h2 {
margin: 0;
font-size: 1.5rem;
color: var(--text-primary);
}
.btn-close {
background: none;
border: none;
font-size: 1.5rem;
color: var(--text-muted);
cursor: pointer;
}
.modal-body {
padding: var(--spacing-lg);
}
.form-group {
margin-bottom: var(--spacing-md);
}
.form-group label {
display: block;
margin-bottom: var(--spacing-xs);
font-weight: 600;
color: var(--text-secondary);
}
.form-input, .form-select {
width: 100%;
padding: var(--spacing-sm);
background-color: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
color: var(--text-primary);
}
.checkbox-label {
display: flex;
align-items: center;
gap: var(--spacing-sm);
cursor: pointer;
}
.modal-footer {
display: flex;
justify-content: flex-end;
gap: var(--spacing-md);
padding: var(--spacing-lg);
border-top: 1px solid var(--border-color);
}
.empty-state {
text-align: center;
padding: var(--spacing-xl);
color: var(--text-muted);
background-color: var(--secondary-bg);
border-radius: var(--radius-md);
border: 1px solid var(--border-color);
}
</style>

View File

@@ -0,0 +1,803 @@
<template>
<div class="scanner-view">
<h1 class="page-title">Library Scanner</h1>
<!-- Notification Toast -->
<div v-if="notification.show" :class="['notification-toast', `notification-${notification.type}`]">
<span class="notification-icon">
<span v-if="notification.type === 'success'"></span>
<span v-else-if="notification.type === 'error'"></span>
<span v-else></span>
</span>
<span class="notification-message">{{ notification.message }}</span>
<button @click="notification.show = false" class="notification-close">×</button>
</div>
<div v-if="loading" class="spinner"></div>
<div v-else>
<!-- Scanner Status Card -->
<div class="card status-card">
<div class="card-header">
<h2 class="card-title">Scanner Status</h2>
<span :class="['badge', scannerStatus?.is_scanning ? 'badge-processing' : 'badge-queued']">
{{ scannerStatus?.is_scanning ? 'Scanning' : 'Idle' }}
</span>
</div>
<div class="card-body">
<div class="status-grid">
<div class="status-item">
<span class="status-label">Scheduler:</span>
<span :class="['badge', scannerStatus?.scheduler_running ? 'badge-completed' : 'badge-cancelled']">
{{ scannerStatus?.scheduler_running ? 'Running' : 'Stopped' }}
</span>
</div>
<div class="status-item">
<span class="status-label">File Watcher:</span>
<span :class="['badge', scannerStatus?.watcher_running ? 'badge-completed' : 'badge-cancelled']">
{{ scannerStatus?.watcher_running ? 'Active' : 'Inactive' }}
</span>
</div>
<div class="status-item">
<span class="status-label">Last Scan:</span>
<span class="status-value">{{ formatDate(scannerStatus?.last_scan_time) }}</span>
</div>
<div class="status-item">
<span class="status-label">Files Scanned:</span>
<span class="status-value">{{ scannerStatus?.total_files_scanned || 0 }}</span>
</div>
</div>
</div>
</div>
<!-- Scanner Controls -->
<div class="card controls-card">
<div class="card-header">
<h2 class="card-title">Scanner Controls</h2>
</div>
<div class="card-body">
<div class="controls-grid">
<!-- Scheduled Scanning -->
<div class="control-section">
<h3 class="control-title">Scheduled Scanning</h3>
<p class="control-description">Scan library periodically at set intervals</p>
<div class="control-actions">
<button
v-if="!scannerStatus?.scheduler_running"
@click="startScheduler"
class="btn btn-primary"
:disabled="actionLoading"
>
Start Scheduler
</button>
<button
v-else
@click="stopScheduler"
class="btn btn-danger"
:disabled="actionLoading"
>
Stop Scheduler
</button>
</div>
</div>
<!-- File Watcher -->
<div class="control-section">
<h3 class="control-title">Real-time File Watcher</h3>
<p class="control-description">Monitor filesystem for new files</p>
<div class="control-actions">
<button
v-if="!scannerStatus?.watcher_running"
@click="startWatcher"
class="btn btn-primary"
:disabled="actionLoading"
>
Start Watcher
</button>
<button
v-else
@click="stopWatcher"
class="btn btn-danger"
:disabled="actionLoading"
>
Stop Watcher
</button>
</div>
</div>
<!-- Manual Scan -->
<div class="control-section">
<h3 class="control-title">Manual Scan</h3>
<p class="control-description">Scan library immediately</p>
<div class="control-actions">
<button
@click="showManualScanModal = true"
class="btn btn-accent"
:disabled="actionLoading || scannerStatus?.is_scanning"
>
Run Manual Scan
</button>
</div>
</div>
</div>
</div>
</div>
<!-- Library Paths -->
<div class="card">
<div class="card-header">
<h2 class="card-title">Library Paths</h2>
</div>
<div class="card-body">
<div v-if="libraryPaths.length === 0" class="empty-state">
<p>No library paths configured. Add paths in Settings.</p>
</div>
<div v-else class="paths-list">
<div v-for="(path, index) in libraryPaths" :key="index" class="path-item">
<span class="path-icon">📁</span>
<span class="path-text">{{ path }}</span>
</div>
</div>
</div>
</div>
<!-- Scan Results -->
<div v-if="scanResults.length > 0" class="card">
<div class="card-header">
<h2 class="card-title">Recent Scan Results</h2>
</div>
<div class="card-body">
<div class="results-table-container">
<table class="results-table">
<thead>
<tr>
<th>Date</th>
<th>Files Scanned</th>
<th>Matched</th>
<th>Jobs Created</th>
<th>Skipped</th>
<th>Duration</th>
</tr>
</thead>
<tbody>
<tr v-for="result in scanResults" :key="result.id">
<td>{{ formatDate(result.timestamp) }}</td>
<td>{{ result.files_scanned }}</td>
<td class="text-success">{{ result.matched }}</td>
<td class="text-primary">{{ result.jobs_created }}</td>
<td class="text-muted">{{ result.skipped }}</td>
<td>{{ formatDuration(result.duration) }}</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
<!-- Manual Scan Modal -->
<div v-if="showManualScanModal" class="modal-overlay" @click="showManualScanModal = false">
<div class="modal-content" @click.stop>
<div class="modal-header">
<h2>Manual Library Scan</h2>
<button @click="showManualScanModal = false" class="btn-close"></button>
</div>
<div class="modal-body">
<p>Start a manual scan of all configured library paths?</p>
<div v-if="libraryPaths.length > 0" class="paths-preview">
<p class="preview-label">Paths to scan:</p>
<ul>
<li v-for="(path, index) in libraryPaths" :key="index">{{ path }}</li>
</ul>
</div>
</div>
<div class="modal-footer">
<button @click="runManualScan" class="btn btn-primary" :disabled="actionLoading">
<span v-if="actionLoading">Scanning...</span>
<span v-else>Start Scan</span>
</button>
<button @click="showManualScanModal = false" class="btn btn-secondary">Cancel</button>
</div>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted, onUnmounted } from 'vue'
import api from '@/services/api'
interface ScannerStatus {
is_scanning: boolean
scheduler_running: boolean
watcher_running: boolean
last_scan_time: string | null
total_files_scanned: number
}
interface ScanResult {
id: number
timestamp: string
files_scanned: number
matched: number
jobs_created: number
skipped: number
duration: number
}
const loading = ref(true)
const actionLoading = ref(false)
const scannerStatus = ref<ScannerStatus | null>(null)
const libraryPaths = ref<string[]>([])
const scanResults = ref<ScanResult[]>([])
const showManualScanModal = ref(false)
// Notification system
const notification = ref<{
show: boolean
type: 'success' | 'error' | 'info'
message: string
}>({
show: false,
type: 'info',
message: ''
})
function showNotification(message: string, type: 'success' | 'error' | 'info' = 'info') {
notification.value = { show: true, type, message }
setTimeout(() => {
notification.value.show = false
}, 5000)
}
let refreshInterval: number | null = null
async function loadData() {
loading.value = true
try {
// Load scanner status
const statusRes = await api.get('/scanner/status')
scannerStatus.value = statusRes.data
// Load library paths from settings
try {
const settingsRes = await api.get('/settings/library_paths')
const pathsData = settingsRes.data.value
// Handle both string (comma-separated) and array types
if (Array.isArray(pathsData)) {
libraryPaths.value = pathsData.filter((p: string) => p && p.trim())
} else if (typeof pathsData === 'string' && pathsData.trim()) {
// Could be JSON array or comma-separated
try {
const parsed = JSON.parse(pathsData)
libraryPaths.value = Array.isArray(parsed) ? parsed : pathsData.split(',').map((p: string) => p.trim()).filter((p: string) => p)
} catch {
// Not JSON, treat as comma-separated
libraryPaths.value = pathsData.split(',').map((p: string) => p.trim()).filter((p: string) => p)
}
} else {
libraryPaths.value = []
}
} catch (err) {
console.error('Failed to load library paths:', err)
libraryPaths.value = []
}
// Load recent scan results (if available)
// TODO: Implement scan history endpoint
scanResults.value = []
} catch (error: any) {
console.error('Failed to load scanner data:', error)
} finally {
loading.value = false
}
}
async function startScheduler() {
actionLoading.value = true
try {
await api.post('/scanner/scheduler/start')
await loadData()
showNotification('Scheduler started successfully', 'success')
} catch (error: any) {
showNotification('Failed to start scheduler: ' + (error.response?.data?.detail || error.message), 'error')
} finally {
actionLoading.value = false
}
}
async function stopScheduler() {
actionLoading.value = true
try {
await api.post('/scanner/scheduler/stop')
await loadData()
showNotification('Scheduler stopped', 'success')
} catch (error: any) {
showNotification('Failed to stop scheduler: ' + (error.response?.data?.detail || error.message), 'error')
} finally {
actionLoading.value = false
}
}
async function startWatcher() {
actionLoading.value = true
try {
await api.post('/scanner/watcher/start')
await loadData()
showNotification('File watcher started successfully', 'success')
} catch (error: any) {
showNotification('Failed to start watcher: ' + (error.response?.data?.detail || error.message), 'error')
} finally {
actionLoading.value = false
}
}
async function stopWatcher() {
actionLoading.value = true
try {
await api.post('/scanner/watcher/stop')
await loadData()
showNotification('File watcher stopped', 'success')
} catch (error: any) {
showNotification('Failed to stop watcher: ' + (error.response?.data?.detail || error.message), 'error')
} finally {
actionLoading.value = false
}
}
async function runManualScan() {
actionLoading.value = true
try {
await api.post('/scanner/scan')
showManualScanModal.value = false
await loadData()
showNotification('Manual scan started successfully!', 'success')
} catch (error: any) {
showNotification('Failed to start manual scan: ' + (error.response?.data?.detail || error.message), 'error')
} finally {
actionLoading.value = false
}
}
function formatDate(dateStr: string | null | undefined): string {
if (!dateStr) return 'Never'
// Parse the ISO string (backend sends timezone-aware UTC dates)
const date = new Date(dateStr)
// Check if date is valid
if (isNaN(date.getTime())) return 'Invalid date'
return date.toLocaleString()
}
function formatDuration(seconds: number): string {
if (seconds < 60) return `${seconds}s`
const mins = Math.floor(seconds / 60)
const secs = seconds % 60
return `${mins}m ${secs}s`
}
onMounted(() => {
loadData()
refreshInterval = window.setInterval(loadData, 5000)
})
onUnmounted(() => {
if (refreshInterval) {
clearInterval(refreshInterval)
}
})
</script>
<style scoped>
.page-title {
font-size: 2rem;
margin-bottom: var(--spacing-xl);
color: var(--text-primary);
}
.status-card {
margin-bottom: var(--spacing-lg);
}
.status-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: var(--spacing-md);
}
.status-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-sm);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
}
.status-label {
font-weight: 600;
color: var(--text-secondary);
}
.status-value {
color: var(--text-primary);
}
.controls-card {
margin-bottom: var(--spacing-lg);
}
.controls-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: var(--spacing-lg);
}
.control-section {
padding: var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-md);
}
.control-title {
font-size: 1.125rem;
margin-bottom: var(--spacing-xs);
color: var(--text-primary);
}
.control-description {
font-size: 0.875rem;
color: var(--text-muted);
margin-bottom: var(--spacing-md);
}
.control-actions {
display: flex;
gap: var(--spacing-sm);
}
/* Schedule Configuration Styles */
.schedule-config {
display: flex;
flex-direction: column;
gap: var(--spacing-lg);
margin-top: var(--spacing-md);
}
.schedule-option {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.schedule-label {
font-size: 0.875rem;
font-weight: 600;
color: var(--text-secondary);
}
.schedule-select,
.schedule-input {
padding: var(--spacing-sm) var(--spacing-md);
background-color: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
color: var(--text-primary);
font-size: 0.875rem;
max-width: 300px;
}
.schedule-select:focus,
.schedule-input:focus {
outline: none;
border-color: var(--accent-color);
}
.custom-interval {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
padding: var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
border-left: 3px solid var(--accent-color);
}
.help-text {
font-size: 0.75rem;
color: var(--text-muted);
font-style: italic;
}
.schedule-preview {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
}
.preview-label {
font-size: 0.875rem;
color: var(--text-secondary);
margin: 0;
}
.preview-value {
font-size: 1rem;
font-weight: 600;
color: var(--accent-color);
}
.schedule-actions {
display: flex;
align-items: center;
gap: var(--spacing-md);
}
.save-indicator {
color: var(--success-color);
font-weight: 600;
animation: fadeIn 0.3s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
.paths-list {
display: flex;
flex-direction: column;
gap: var(--spacing-sm);
}
.path-item {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
font-family: monospace;
}
.path-icon {
font-size: 1.25rem;
}
.path-text {
color: var(--text-primary);
}
.results-table-container {
overflow-x: auto;
}
.results-table {
width: 100%;
border-collapse: collapse;
}
.results-table th,
.results-table td {
padding: var(--spacing-md);
text-align: left;
border-bottom: 1px solid var(--border-color);
}
.results-table th {
background-color: var(--tertiary-bg);
font-weight: 600;
color: var(--text-secondary);
text-transform: uppercase;
font-size: 0.75rem;
}
.text-success {
color: var(--success-color);
}
.text-primary {
color: var(--accent-color);
}
.text-muted {
color: var(--text-muted);
}
.modal-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.7);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
}
.modal-content {
background-color: var(--secondary-bg);
border-radius: var(--radius-md);
border: 1px solid var(--border-color);
max-width: 500px;
width: 90%;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-lg);
border-bottom: 1px solid var(--border-color);
}
.modal-header h2 {
margin: 0;
font-size: 1.5rem;
color: var(--text-primary);
}
.btn-close {
background: none;
border: none;
font-size: 1.5rem;
color: var(--text-muted);
cursor: pointer;
padding: 0;
width: 30px;
height: 30px;
}
.modal-body {
padding: var(--spacing-lg);
}
.paths-preview {
margin-top: var(--spacing-md);
padding: var(--spacing-md);
background-color: var(--tertiary-bg);
border-radius: var(--radius-sm);
}
.preview-label {
font-weight: 600;
margin-bottom: var(--spacing-sm);
color: var(--text-secondary);
}
.paths-preview ul {
margin: 0;
padding-left: var(--spacing-lg);
}
.paths-preview li {
font-family: monospace;
color: var(--text-primary);
margin-bottom: var(--spacing-xs);
}
.modal-footer {
display: flex;
justify-content: flex-end;
gap: var(--spacing-md);
padding: var(--spacing-lg);
border-top: 1px solid var(--border-color);
}
.empty-state {
text-align: center;
padding: var(--spacing-xl);
color: var(--text-muted);
}
.badge {
display: inline-block;
padding: 4px 8px;
border-radius: var(--radius-sm);
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
}
.badge-completed {
background-color: var(--success-color);
color: var(--primary-bg);
}
.badge-cancelled {
background-color: var(--text-muted);
color: var(--primary-bg);
}
.badge-processing {
background-color: var(--accent-color);
color: var(--primary-bg);
}
.badge-queued {
background-color: var(--warning-color);
color: var(--primary-bg);
}
/* Notification Toast */
.notification-toast {
position: fixed;
top: 80px;
right: var(--spacing-lg);
min-width: 300px;
max-width: 500px;
padding: var(--spacing-md);
border-radius: var(--radius-md);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
display: flex;
align-items: center;
gap: var(--spacing-md);
z-index: 9999;
animation: slideIn 0.3s ease-out;
}
@keyframes slideIn {
from {
transform: translateX(400px);
opacity: 0;
}
to {
transform: translateX(0);
opacity: 1;
}
}
.notification-success {
background-color: var(--success-color);
color: white;
}
.notification-error {
background-color: var(--danger-color);
color: white;
}
.notification-info {
background-color: var(--accent-color);
color: white;
}
.notification-icon {
font-size: 1.5rem;
font-weight: bold;
}
.notification-message {
flex: 1;
font-size: 0.95rem;
}
.notification-close {
background: none;
border: none;
color: white;
font-size: 1.5rem;
cursor: pointer;
padding: 0;
width: 24px;
height: 24px;
display: flex;
align-items: center;
justify-content: center;
opacity: 0.8;
transition: opacity 0.2s;
}
.notification-close:hover {
opacity: 1;
}
</style>

View File

@@ -0,0 +1,787 @@
<template>
<div class="settings-view">
<div class="page-header">
<h1 class="page-title">Settings</h1>
<div class="header-actions">
<button @click="loadSettings" class="btn btn-secondary" :disabled="loading">
<span v-if="loading">Loading...</span>
<span v-else> Refresh</span>
</button>
<button @click="saveSettings" class="btn btn-primary" :disabled="saving || !hasChanges">
<span v-if="saving">Saving...</span>
<span v-else>💾 Save Changes</span>
</button>
</div>
</div>
<div v-if="loading" class="spinner"></div>
<div v-else class="settings-container">
<!-- General Settings -->
<div class="card settings-card">
<div class="card-header">
<h2 class="card-title">🔧 General Settings</h2>
</div>
<div class="card-body">
<div class="settings-grid">
<div class="setting-item full-width">
<label class="setting-label">
Operation Mode
<span class="setting-description">Standalone or Bazarr provider mode (requires restart)</span>
</label>
<select v-model="settings.operation_mode" class="setting-input" @change="markChanged">
<option value="standalone">Standalone</option>
<option value="bazarr_slave">Bazarr Provider</option>
</select>
</div>
<!-- Library Paths - Solo en modo Standalone -->
<div v-if="isStandalone" class="setting-item full-width">
<label class="setting-label">
Library Paths
<span class="setting-description">Media library folders to scan</span>
</label>
<div class="paths-list">
<div v-for="(path, index) in libraryPaths" :key="index" class="path-display">
<code class="path-code">{{ path }}</code>
<button @click="removePath(index)" class="btn-icon">🗑</button>
</div>
<button @click="showPathBrowser = true" class="btn btn-secondary btn-sm">
📁 Browse for Path
</button>
</div>
</div>
<div class="setting-item">
<label class="setting-label">
Log Level
<span class="setting-description">Application logging level</span>
</label>
<select v-model="settings.log_level" class="setting-input" @change="markChanged">
<option value="DEBUG">DEBUG</option>
<option value="INFO">INFO</option>
<option value="WARNING">WARNING</option>
<option value="ERROR">ERROR</option>
</select>
</div>
</div>
</div>
</div>
<!-- Worker Settings -->
<div class="card settings-card">
<div class="card-header">
<h2 class="card-title"> Worker Settings</h2>
</div>
<div class="card-body">
<div class="settings-grid">
<div class="setting-item">
<label class="setting-label">
CPU Workers on Startup
<span class="setting-description">Number of CPU workers to start automatically</span>
</label>
<input
type="number"
v-model.number="settings.worker_cpu_count"
class="setting-input"
min="0"
max="16"
@input="markChanged"
/>
</div>
<div class="setting-item">
<label class="setting-label">
GPU Workers on Startup
<span class="setting-description">Number of GPU workers to start automatically</span>
</label>
<input
type="number"
v-model.number="settings.worker_gpu_count"
class="setting-input"
min="0"
max="8"
:disabled="!hasGPU"
:placeholder="hasGPU ? '0' : 'No GPU detected'"
@input="markChanged"
/>
<span v-if="!hasGPU" class="warning-message">
No GPU detected - GPU workers will not start
</span>
</div>
<div class="setting-item">
<label class="setting-label">
Health Check Interval
<span class="setting-description">Worker health check interval (seconds)</span>
</label>
<input
type="number"
v-model.number="settings.worker_healthcheck_interval"
class="setting-input"
min="10"
max="300"
@input="markChanged"
/>
</div>
<div class="setting-item">
<label class="setting-label">
Auto-Restart Failed Workers
<span class="setting-description">Automatically restart workers that crash</span>
</label>
<label class="toggle-switch">
<input
type="checkbox"
v-model="settings.worker_auto_restart"
@change="markChanged"
/>
<span class="toggle-slider"></span>
</label>
</div>
</div>
</div>
</div>
<!-- Transcription Settings -->
<div class="card settings-card">
<div class="card-header">
<h2 class="card-title">🎤 Transcription Settings</h2>
</div>
<div class="card-body">
<div class="settings-grid">
<div class="setting-item">
<label class="setting-label">
Whisper Model
<span class="setting-description">AI model size (larger = better quality, slower)</span>
</label>
<select v-model="settings.whisper_model" class="setting-input" @change="markChanged">
<option value="tiny">Tiny (fastest)</option>
<option value="base">Base</option>
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="large-v2">Large v2</option>
<option value="large-v3">Large v3 (best)</option>
</select>
</div>
<div class="setting-item">
<label class="setting-label">
CPU Compute Type
<span class="setting-description">Precision for CPU workers</span>
</label>
<select v-model="settings.cpu_compute_type" class="setting-input" @change="markChanged">
<option value="auto">Auto (recommended)</option>
<option value="int8">Int8 (faster, lower quality)</option>
<option value="float32">Float32 (slower, better quality)</option>
</select>
</div>
<div class="setting-item" v-if="hasGPU">
<label class="setting-label">
GPU Compute Type
<span class="setting-description">Precision for GPU workers</span>
</label>
<select v-model="settings.gpu_compute_type" class="setting-input" @change="markChanged">
<option value="auto">Auto (recommended)</option>
<option value="float16">Float16 (fast, recommended)</option>
<option value="float32">Float32 (slower, more precise)</option>
<option value="int8_float16">Int8 + Float16 (fastest, lower quality)</option>
<option value="int8">Int8 (very fast, lowest quality)</option>
</select>
</div>
<div class="setting-item full-width">
<label class="setting-label">
Skip if Subtitle Exists
<span class="setting-description">Skip transcription if subtitle file already exists</span>
</label>
<label class="toggle-switch">
<input
type="checkbox"
v-model="settings.skip_if_exists"
@change="markChanged"
/>
<span class="toggle-slider"></span>
</label>
</div>
</div>
</div>
</div>
<!-- Scanner Settings - Solo en modo Standalone -->
<div v-if="isStandalone" class="card settings-card">
<div class="card-header">
<h2 class="card-title">🔍 Scanner Settings</h2>
</div>
<div class="card-body">
<div class="settings-grid">
<div class="setting-item full-width">
<label class="setting-label">
Enable Library Scanner
<span class="setting-description">Automatically scan libraries for new media</span>
</label>
<label class="toggle-switch">
<input
type="checkbox"
v-model="settings.scanner_enabled"
@change="markChanged"
/>
<span class="toggle-slider"></span>
</label>
</div>
<div class="setting-item full-width">
<label class="setting-label">
Scan Interval
<span class="setting-description">How often should the scanner run automatically</span>
</label>
<div class="interval-config">
<select v-model="scanInterval" class="setting-input" @change="handleIntervalChange">
<option :value="15">Every 15 minutes</option>
<option :value="30">Every 30 minutes</option>
<option :value="60">Every hour</option>
<option :value="120">Every 2 hours</option>
<option :value="180">Every 3 hours</option>
<option :value="360">Every 6 hours (recommended)</option>
<option :value="720">Every 12 hours</option>
<option :value="1440">Every 24 hours (daily)</option>
<option value="custom">Custom...</option>
</select>
<div v-if="scanInterval === 'custom'" class="custom-interval-input">
<input
type="number"
v-model.number="customScanInterval"
class="setting-input"
min="1"
max="10080"
placeholder="Minutes"
@input="handleCustomIntervalChange"
/>
<span class="help-text">Between 1 minute and 7 days (10080 minutes)</span>
</div>
<div class="interval-preview">
<span class="preview-icon">📅</span>
<span class="preview-text">
Scans will run approximately every: <strong>{{ getScanIntervalText() }}</strong>
</span>
</div>
</div>
</div>
<div class="setting-item full-width">
<label class="setting-label">
Enable File Watcher
<span class="setting-description">Watch for new files in real-time</span>
</label>
<label class="toggle-switch">
<input
type="checkbox"
v-model="settings.watcher_enabled"
@change="markChanged"
/>
<span class="toggle-slider"></span>
</label>
</div>
</div>
</div>
</div>
<!-- Bazarr Provider Settings - Solo en modo Bazarr -->
<div v-if="!isStandalone" class="card settings-card">
<div class="card-header">
<h2 class="card-title">🔌 Bazarr Provider Settings</h2>
</div>
<div class="card-body">
<div class="settings-grid">
<div class="setting-item full-width">
<label class="setting-label">
Provider Enabled
<span class="setting-description">Enable Bazarr provider API</span>
</label>
<label class="toggle-switch">
<input
type="checkbox"
v-model="settings.bazarr_provider_enabled"
@change="markChanged"
/>
<span class="toggle-slider"></span>
</label>
</div>
<div v-if="bazarrApiKey" class="setting-item full-width">
<label class="setting-label">
API Key
<span class="setting-description">Use this key to configure Bazarr</span>
</label>
<div class="copy-field">
<code>{{ bazarrApiKey }}</code>
<button @click="copyToClipboard(bazarrApiKey)" class="btn-icon">📋</button>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Path Browser Modal -->
<div v-if="showPathBrowser" class="modal-overlay" @click.self="showPathBrowser = false">
<PathBrowser @select="addPath" @close="showPathBrowser = false" />
</div>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
import { useConfigStore } from '@/stores/config'
import PathBrowser from '@/components/PathBrowser.vue'
import axios from 'axios'
const configStore = useConfigStore()
const loading = ref(true)
const saving = ref(false)
const hasChanges = ref(false)
const showPathBrowser = ref(false)
// Settings
const settings = ref({
operation_mode: 'standalone',
log_level: 'INFO',
worker_cpu_count: 0,
worker_gpu_count: 0,
worker_healthcheck_interval: 30,
worker_auto_restart: true,
whisper_model: 'large-v3',
cpu_compute_type: 'auto',
gpu_compute_type: 'auto',
skip_if_exists: true,
scanner_enabled: false,
scanner_cron: '0 2 * * *',
watcher_enabled: false,
bazarr_provider_enabled: false
})
const libraryPaths = ref<string[]>([])
const bazarrApiKey = ref('')
// Scanner interval configuration
const scanInterval = ref<number | 'custom'>(360) // Default: 6 hours
const customScanInterval = ref(90)
const hasGPU = computed(() => configStore.hasGPU)
const isStandalone = computed(() => settings.value.operation_mode === 'standalone')
function markChanged() {
hasChanges.value = true
}
async function loadSettings() {
loading.value = true
hasChanges.value = false
try {
const response = await axios.get('/api/settings')
const settingsMap: Record<string, any> = {}
response.data.forEach((setting: any) => {
settingsMap[setting.key] = setting.value
})
// Parse settings
settings.value.operation_mode = settingsMap['operation_mode'] || 'standalone'
settings.value.log_level = settingsMap['log_level'] || 'INFO'
settings.value.worker_cpu_count = parseInt(settingsMap['worker_cpu_count'] || '0')
// Force GPU worker count to 0 if no GPU detected
settings.value.worker_gpu_count = hasGPU.value ? parseInt(settingsMap['worker_gpu_count'] || '0') : 0
settings.value.worker_healthcheck_interval = parseInt(settingsMap['worker_healthcheck_interval'] || '30')
settings.value.worker_auto_restart = settingsMap['worker_auto_restart'] === 'true'
settings.value.whisper_model = settingsMap['whisper_model'] || 'large-v3'
settings.value.cpu_compute_type = settingsMap['cpu_compute_type'] || settingsMap['compute_type'] || 'auto'
settings.value.gpu_compute_type = settingsMap['gpu_compute_type'] || settingsMap['compute_type'] || 'auto'
settings.value.skip_if_exists = settingsMap['skip_if_exists'] !== 'false'
settings.value.scanner_enabled = settingsMap['scanner_enabled'] === 'true'
settings.value.scanner_cron = settingsMap['scanner_cron'] || '0 2 * * *'
settings.value.watcher_enabled = settingsMap['watcher_enabled'] === 'true'
settings.value.bazarr_provider_enabled = settingsMap['bazarr_provider_enabled'] === 'true'
// Parse library paths
const pathsStr = settingsMap['library_paths'] || ''
libraryPaths.value = pathsStr ? pathsStr.split(',').map((p: string) => p.trim()).filter((p: string) => p) : []
// Get Bazarr API key if exists
bazarrApiKey.value = settingsMap['bazarr_api_key'] || ''
// Load scanner interval
const interval = parseInt(settingsMap['scanner_schedule_interval_minutes'] || '360')
const presets = [15, 30, 60, 120, 180, 360, 720, 1440]
if (presets.includes(interval)) {
scanInterval.value = interval
} else {
scanInterval.value = 'custom'
customScanInterval.value = interval
}
} catch (error) {
console.error('Failed to load settings:', error)
alert('Failed to load settings')
} finally {
loading.value = false
}
}
async function saveSettings() {
saving.value = true
try {
// Calculate final scan interval
const finalScanInterval = scanInterval.value === 'custom' ? customScanInterval.value : scanInterval.value
// Force GPU worker count to 0 if no GPU detected
const gpuWorkerCount = hasGPU.value ? settings.value.worker_gpu_count : 0
const updates: Record<string, string> = {
operation_mode: settings.value.operation_mode,
log_level: settings.value.log_level,
worker_cpu_count: settings.value.worker_cpu_count.toString(),
worker_gpu_count: gpuWorkerCount.toString(),
worker_healthcheck_interval: settings.value.worker_healthcheck_interval.toString(),
worker_auto_restart: settings.value.worker_auto_restart.toString(),
whisper_model: settings.value.whisper_model,
cpu_compute_type: settings.value.cpu_compute_type,
gpu_compute_type: settings.value.gpu_compute_type,
skip_if_exists: settings.value.skip_if_exists.toString(),
scanner_enabled: settings.value.scanner_enabled.toString(),
scanner_schedule_interval_minutes: finalScanInterval.toString(),
watcher_enabled: settings.value.watcher_enabled.toString(),
bazarr_provider_enabled: settings.value.bazarr_provider_enabled.toString(),
library_paths: libraryPaths.value.join(',')
}
await axios.post('/api/settings/bulk-update', { settings: updates })
hasChanges.value = false
alert('Settings saved successfully! Some changes may require a restart.')
// Reload config
await configStore.fetchConfig()
} catch (error: any) {
console.error('Failed to save settings:', error)
alert('Failed to save settings: ' + (error.response?.data?.detail || error.message))
} finally {
saving.value = false
}
}
function addPath(path: string) {
if (path && !libraryPaths.value.includes(path)) {
libraryPaths.value.push(path)
markChanged()
}
showPathBrowser.value = false
}
function removePath(index: number) {
libraryPaths.value.splice(index, 1)
markChanged()
}
function copyToClipboard(text: string) {
navigator.clipboard.writeText(text)
alert('Copied to clipboard!')
}
function handleIntervalChange() {
markChanged()
}
function handleCustomIntervalChange() {
markChanged()
}
function getScanIntervalText(): string {
const interval = scanInterval.value === 'custom' ? customScanInterval.value : scanInterval.value
if (!interval || interval <= 0) return 'Invalid interval'
if (interval < 60) {
return `${interval} minutes`
} else if (interval < 1440) {
const hours = Math.floor(interval / 60)
const mins = interval % 60
return mins > 0 ? `${hours}h ${mins}m` : `${hours} hours`
} else {
const days = Math.floor(interval / 1440)
const hours = Math.floor((interval % 1440) / 60)
if (hours > 0) {
return `${days} days ${hours}h`
}
return `${days} days`
}
}
onMounted(async () => {
// Detect GPU first so we can properly handle GPU worker count
await configStore.detectGPU()
await loadSettings()
})
</script>
<style scoped>
.page-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-xl);
}
.header-actions {
display: flex;
gap: var(--spacing-md);
}
.settings-container {
max-width: 1200px;
}
.settings-card {
margin-bottom: var(--spacing-lg);
}
.settings-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: var(--spacing-lg);
}
.setting-item {
display: flex;
flex-direction: column;
gap: var(--spacing-sm);
}
.setting-item.full-width {
grid-column: 1 / -1;
}
.setting-label {
font-weight: 600;
color: var(--text-primary);
font-size: 0.875rem;
}
.setting-description {
display: block;
font-weight: 400;
color: var(--text-secondary);
font-size: 0.75rem;
margin-top: var(--spacing-xs);
}
.setting-input {
padding: var(--spacing-sm) var(--spacing-md);
background: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
color: var(--text-primary);
font-size: 0.875rem;
}
.setting-input:focus {
outline: none;
border-color: var(--accent-color);
}
.setting-input:disabled {
opacity: 0.5;
cursor: not-allowed;
background: var(--secondary-bg);
}
.warning-message {
color: var(--warning-color);
font-size: 0.75rem;
display: flex;
align-items: center;
gap: var(--spacing-xs);
margin-top: var(--spacing-xs);
}
/* Toggle Switch */
.toggle-switch {
position: relative;
display: inline-block;
width: 50px;
height: 26px;
}
.toggle-switch input {
opacity: 0;
width: 0;
height: 0;
}
.toggle-slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: var(--tertiary-bg);
border: 1px solid var(--border-color);
transition: 0.3s;
border-radius: 26px;
}
.toggle-slider:before {
position: absolute;
content: "";
height: 18px;
width: 18px;
left: 3px;
bottom: 3px;
background-color: var(--text-secondary);
transition: 0.3s;
border-radius: 50%;
}
input:checked + .toggle-slider {
background-color: var(--accent-color);
border-color: var(--accent-color);
}
input:checked + .toggle-slider:before {
transform: translateX(24px);
background-color: white;
}
/* Paths List */
.paths-list {
display: flex;
flex-direction: column;
gap: var(--spacing-sm);
}
.path-display {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm);
background: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
}
.path-code {
flex: 1;
background: var(--primary-bg);
padding: var(--spacing-xs) var(--spacing-sm);
border-radius: var(--radius-sm);
color: var(--accent-color);
font-family: monospace;
font-size: 0.875rem;
}
.btn-icon {
background: none;
border: none;
cursor: pointer;
font-size: 1.25rem;
padding: var(--spacing-xs);
border-radius: var(--radius-sm);
transition: background-color var(--transition-fast);
}
.btn-icon:hover {
background: var(--secondary-bg);
}
.copy-field {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm);
background: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
}
.copy-field code {
flex: 1;
color: var(--accent-color);
font-family: monospace;
word-break: break-all;
}
.modal-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: rgba(0, 0, 0, 0.8);
display: flex;
justify-content: center;
align-items: center;
z-index: 9999;
}
/* Scanner Interval Configuration */
.interval-config {
display: flex;
flex-direction: column;
gap: var(--spacing-md);
}
.custom-interval-input {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
padding: var(--spacing-md);
background: var(--tertiary-bg);
border-radius: var(--radius-sm);
border-left: 3px solid var(--accent-color);
}
.help-text {
font-size: 0.75rem;
color: var(--text-muted);
font-style: italic;
}
.interval-preview {
display: flex;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-md);
background: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
}
.preview-icon {
font-size: 1.5rem;
}
.preview-text {
color: var(--text-secondary);
font-size: 0.875rem;
}
.preview-text strong {
color: var(--accent-color);
font-weight: 600;
}
@media (max-width: 768px) {
.settings-grid {
grid-template-columns: 1fr;
}
}
</style>

View File

@@ -0,0 +1,450 @@
<template>
<div class="workers-view">
<div class="page-header">
<h1 class="page-title">Worker Management</h1>
<div class="header-actions">
<button @click="showAddWorkerModal = true" class="btn btn-success">
Add Worker
</button>
<button @click="refreshWorkers" class="btn btn-secondary" :disabled="loading">
🔄 Refresh
</button>
</div>
</div>
<!-- Worker Stats -->
<div v-if="workersStore.stats" class="card">
<div class="card-header">
<h2 class="card-title">Pool Statistics</h2>
</div>
<div class="card-body">
<div class="stats-grid-large">
<div class="stat-card">
<div class="stat-icon">👷</div>
<div class="stat-info">
<div class="stat-number">{{ workersStore.stats.total_workers }}</div>
<div class="stat-label">Total Workers</div>
</div>
</div>
<div class="stat-card">
<div class="stat-icon">💻</div>
<div class="stat-info">
<div class="stat-number">{{ workersStore.stats.cpu_workers }}</div>
<div class="stat-label">CPU Workers</div>
</div>
</div>
<div class="stat-card">
<div class="stat-icon">🎮</div>
<div class="stat-info">
<div class="stat-number">{{ workersStore.stats.gpu_workers }}</div>
<div class="stat-label">GPU Workers</div>
</div>
</div>
<div class="stat-card">
<div class="stat-icon"></div>
<div class="stat-info">
<div class="stat-number text-success">{{ workersStore.stats.total_jobs_completed }}</div>
<div class="stat-label">Jobs Completed</div>
</div>
</div>
</div>
</div>
</div>
<!-- Workers List -->
<div class="card">
<div class="card-header">
<h2 class="card-title">Active Workers</h2>
</div>
<div class="card-body">
<div v-if="loading" class="spinner"></div>
<div v-else-if="workersStore.workers.length === 0" class="empty-state">
<p>No workers running</p>
<button @click="showAddWorkerModal = true" class="btn btn-primary">Add First Worker</button>
</div>
<table v-else class="table">
<thead>
<tr>
<th>Worker ID</th>
<th>Type</th>
<th>Status</th>
<th>Current Job</th>
<th>Progress</th>
<th>Completed</th>
<th>Failed</th>
<th>Uptime</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
<tr v-for="worker in workersStore.workers" :key="worker.worker_id">
<td class="worker-id">{{ worker.worker_id }}</td>
<td>
<span class="badge" :class="worker.worker_type === 'gpu' ? 'badge-processing' : 'badge-queued'">
{{ worker.worker_type.toUpperCase() }}
<span v-if="worker.device_id !== null">:{{ worker.device_id }}</span>
</span>
</td>
<td>
<span class="badge" :class="`badge-${worker.status}`">
{{ worker.status }}
</span>
</td>
<td>
<span v-if="worker.current_job_id" class="job-id">{{ worker.current_job_id.slice(0, 8) }}...</span>
<span v-else class="text-muted"></span>
</td>
<td>
<div v-if="worker.current_job_progress > 0" class="progress-container">
<div class="progress">
<div class="progress-bar" :style="{ width: `${worker.current_job_progress}%` }"></div>
</div>
<span class="progress-text">{{ worker.current_job_progress.toFixed(1) }}%</span>
</div>
<span v-else class="text-muted"></span>
</td>
<td class="text-success">{{ worker.jobs_completed }}</td>
<td class="text-danger">{{ worker.jobs_failed }}</td>
<td>{{ formatUptime(worker.uptime_seconds) }}</td>
<td>
<button
@click="removeWorker(worker.worker_id)"
class="btn btn-danger btn-sm"
:disabled="worker.status === 'busy'"
>
🗑 Remove
</button>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<!-- Add Worker Modal -->
<div v-if="showAddWorkerModal" class="modal-overlay" @click.self="showAddWorkerModal = false">
<div class="modal">
<div class="modal-header">
<h2>Add Worker</h2>
<button @click="showAddWorkerModal = false" class="btn-close"></button>
</div>
<div class="modal-body">
<div class="form-group">
<label>Worker Type</label>
<select v-model="newWorker.worker_type" class="form-control">
<option value="cpu">CPU</option>
<option value="gpu" :disabled="!configStore.hasGPU">
GPU {{ !configStore.hasGPU ? '(Not detected)' : '' }}
</option>
</select>
<span v-if="!configStore.hasGPU" class="warning-text">
No GPU detected on this system
</span>
</div>
<div v-if="newWorker.worker_type === 'gpu'" class="form-group">
<label>GPU Device ID</label>
<input v-model.number="newWorker.device_id" type="number" min="0" class="form-control" />
</div>
</div>
<div class="modal-footer">
<button @click="showAddWorkerModal = false" class="btn btn-secondary">Cancel</button>
<button @click="addWorker" class="btn btn-success" :disabled="addingWorker">
{{ addingWorker ? 'Adding...' : 'Add Worker' }}
</button>
</div>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted, onUnmounted } from 'vue'
import { useWorkersStore } from '@/stores/workers'
import { useConfigStore } from '@/stores/config'
import type { AddWorkerRequest } from '@/types/api'
const workersStore = useWorkersStore()
const configStore = useConfigStore()
const loading = ref(true)
const showAddWorkerModal = ref(false)
const addingWorker = ref(false)
const newWorker = ref<AddWorkerRequest>({
worker_type: 'cpu',
device_id: 0
})
let refreshInterval: number | null = null
async function loadWorkers() {
loading.value = true
try {
await workersStore.fetchWorkers()
await workersStore.fetchStats()
} catch (error) {
console.error('Failed to load workers:', error)
} finally {
loading.value = false
}
}
async function refreshWorkers() {
await loadWorkers()
}
async function addWorker() {
addingWorker.value = true
try {
await workersStore.addWorker(newWorker.value)
showAddWorkerModal.value = false
// Reset form
newWorker.value = {
worker_type: 'cpu',
device_id: 0
}
} catch (error: any) {
alert('Failed to add worker: ' + (error.message || 'Unknown error'))
} finally {
addingWorker.value = false
}
}
async function removeWorker(workerId: string) {
if (!confirm(`Are you sure you want to remove worker ${workerId}?`)) {
return
}
try {
await workersStore.removeWorker(workerId)
} catch (error: any) {
alert('Failed to remove worker: ' + (error.message || 'Unknown error'))
}
}
function formatUptime(seconds: number): string {
const hours = Math.floor(seconds / 3600)
const minutes = Math.floor((seconds % 3600) / 60)
if (hours > 0) return `${hours}h ${minutes}m`
return `${minutes}m`
}
onMounted(() => {
loadWorkers()
// Auto-refresh every 3 seconds
refreshInterval = window.setInterval(loadWorkers, 3000)
})
onUnmounted(() => {
if (refreshInterval) {
clearInterval(refreshInterval)
}
})
</script>
<style scoped>
.page-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-xl);
}
.header-actions {
display: flex;
gap: var(--spacing-md);
}
.stats-grid-large {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: var(--spacing-lg);
}
.stat-card {
display: flex;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-lg);
background-color: var(--tertiary-bg);
border-radius: var(--radius-md);
}
.stat-icon {
font-size: 2.5rem;
}
.stat-info {
flex: 1;
}
.stat-number {
font-size: 1.75rem;
font-weight: 700;
color: var(--text-primary);
}
.stat-label {
font-size: 0.875rem;
color: var(--text-muted);
}
.worker-id {
font-family: monospace;
font-size: 0.875rem;
}
.job-id {
font-family: monospace;
font-size: 0.75rem;
color: var(--accent-color);
}
.progress-container {
display: flex;
align-items: center;
gap: var(--spacing-sm);
}
.progress {
flex: 1;
min-width: 100px;
}
.progress-text {
font-size: 0.75rem;
color: var(--text-secondary);
min-width: 45px;
}
.empty-state {
text-align: center;
padding: var(--spacing-xl);
color: var(--text-muted);
}
.empty-state p {
margin-bottom: var(--spacing-md);
font-size: 1.125rem;
}
/* Modal styles */
.modal-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.7);
display: flex;
justify-content: center;
align-items: center;
z-index: 2000;
}
.modal {
background-color: var(--secondary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-lg);
width: 90%;
max-width: 500px;
max-height: 90vh;
overflow: auto;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--spacing-lg);
border-bottom: 1px solid var(--border-color);
}
.modal-header h2 {
font-size: 1.25rem;
color: var(--text-primary);
}
.btn-close {
background: none;
border: none;
color: var(--text-secondary);
font-size: 1.5rem;
cursor: pointer;
padding: 0;
width: 32px;
height: 32px;
display: flex;
align-items: center;
justify-content: center;
border-radius: var(--radius-sm);
transition: all var(--transition-fast);
}
.btn-close:hover {
background-color: var(--tertiary-bg);
color: var(--text-primary);
}
.modal-body {
padding: var(--spacing-lg);
}
.modal-footer {
display: flex;
justify-content: flex-end;
gap: var(--spacing-md);
padding: var(--spacing-lg);
border-top: 1px solid var(--border-color);
}
.form-group {
margin-bottom: var(--spacing-md);
}
.form-group label {
display: block;
margin-bottom: var(--spacing-sm);
color: var(--text-secondary);
font-weight: 500;
}
.form-control {
width: 100%;
padding: var(--spacing-sm) var(--spacing-md);
background-color: var(--tertiary-bg);
border: 1px solid var(--border-color);
border-radius: var(--radius-sm);
color: var(--text-primary);
font-size: 0.875rem;
}
.form-control:focus {
outline: none;
border-color: var(--accent-color);
}
.warning-text {
color: var(--warning-color);
font-size: 0.75rem;
display: block;
margin-top: var(--spacing-xs);
}
@media (max-width: 768px) {
.page-header {
flex-direction: column;
align-items: flex-start;
gap: var(--spacing-md);
}
.header-actions {
width: 100%;
}
.header-actions button {
flex: 1;
}
}
</style>

14
frontend/tsconfig.json Normal file
View File

@@ -0,0 +1,14 @@
{
"extends": "@vue/tsconfig/tsconfig.dom.json",
"include": ["env.d.ts", "src/**/*", "src/**/*.vue"],
"exclude": ["src/**/__tests__/*"],
"compilerOptions": {
"composite": true,
"baseUrl": ".",
"paths": {
"@/*": ["./src/*"]
},
"types": ["vite/client"]
}
}

File diff suppressed because one or more lines are too long

34
frontend/vite.config.ts Normal file
View File

@@ -0,0 +1,34 @@
import { fileURLToPath, URL } from 'node:url'
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
// https://vitejs.dev/config/
export default defineConfig({
plugins: [
vue(),
],
resolve: {
alias: {
'@': fileURLToPath(new URL('./src', import.meta.url))
}
},
server: {
port: 3000,
proxy: {
'/api': {
target: 'http://127.0.0.1:8000',
changeOrigin: true,
},
'/health': {
target: 'http://127.0.0.1:8000',
changeOrigin: true,
}
}
},
build: {
outDir: 'dist',
assetsDir: 'assets',
sourcemap: false,
}
})

View File

@@ -1,182 +0,0 @@
import os
import sys
import urllib.request
import subprocess
import argparse
def convert_to_bool(in_bool):
# Convert the input to string and lower case, then check against true values
return str(in_bool).lower() in ('true', 'on', '1', 'y', 'yes')
def install_packages_from_requirements(requirements_file):
try:
subprocess.run(['pip3', 'install', '-r', requirements_file, '--upgrade'], check=True)
print("Packages installed successfully using pip3.")
except subprocess.CalledProcessError:
try:
subprocess.run(['pip', 'install', '-r', requirements_file, '--upgrade'], check=True)
print("Packages installed successfully using pip.")
except subprocess.CalledProcessError:
print("Failed to install packages using both pip3 and pip.")
def download_from_github(url, output_file):
try:
with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
data = response.read()
out_file.write(data)
print(f"File downloaded successfully to {output_file}")
except urllib.error.HTTPError as e:
print(f"Failed to download file from {url}. HTTP Error Code: {e.code}")
except urllib.error.URLError as e:
print(f"URL Error: {e.reason}")
except Exception as e:
print(f"An error occurred: {e}")
def prompt_and_save_bazarr_env_variables():
instructions = (
"You will be prompted for several configuration values.\n"
"If you wish to use the default value for any of them, simply press Enter without typing anything.\n"
"The default values are shown in brackets [] next to the prompts.\n"
"Items can be the value of true, on, 1, y, yes, false, off, 0, n, no, or an appropriate text response.\n"
)
print(instructions)
env_vars = {
'WHISPER_MODEL': ('Whisper Model', 'Enter the Whisper model you want to run: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, distil-large-v2, distil-medium.en, distil-small.en', 'medium'),
'WEBHOOKPORT': ('Webhook Port', 'Default listening port for transcriptarr.py', '9000'),
'TRANSCRIBE_DEVICE': ('Transcribe Device', 'Set as cpu or gpu', 'gpu'),
# Defaulting to False here for the prompt, user can change
'DEBUG': ('Debug', 'Enable debug logging (true/false)', 'False'),
'CLEAR_VRAM_ON_COMPLETE': ('Clear VRAM', 'Attempt to clear VRAM when complete (Windows users may need to set this to False)', 'False'),
'APPEND': ('Append', 'Append \'Transcribed by whisper\' to generated subtitle (true/false)', 'False'),
}
user_input = {}
with open('.env', 'w') as file:
for var, (description, prompt, default) in env_vars.items():
value = input(f"{prompt} [{default}]: ") or default
file.write(f"{var}={value}\n")
print("Environment variables have been saved to .env")
def load_env_variables(env_filename='.env'):
try:
with open(env_filename, 'r') as file:
for line in file:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
var, value = line.split('=', 1)
# Only set if not already set by a higher priority mechanism (like external env var)
# For this simple loader, we'll let it overwrite,
# and CLI args will overwrite these later if specified.
os.environ[var] = value
print(f"Environment variables have been loaded from {env_filename}")
except FileNotFoundError:
print(f"{env_filename} file not found. Consider running with --setup-bazarr or creating it manually.")
def main():
if 'python3' in sys.executable:
python_cmd = 'python3'
elif 'python' in sys.executable:
python_cmd = 'python'
else:
print("Script started with an unknown command")
sys.exit(1)
if sys.version_info[0] < 3:
print(f"This script requires Python 3 or higher, you are running {sys.version}")
sys.exit(1)
os.chdir(os.path.dirname(os.path.abspath(__file__)))
parser = argparse.ArgumentParser(prog="python launcher.py", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# Changed: action='store_true' means it's False by default, True if flag is present
parser.add_argument('-d', '--debug', action='store_true', help="Enable console debugging (overrides .env and external ENV)")
parser.add_argument('-i', '--install', action='store_true', help="Install/update all necessary packages")
# Changed: action='store_true'
parser.add_argument('-a', '--append', action='store_true', help="Append 'Transcribed by whisper' (overrides .env and external ENV)")
parser.add_argument('-u', '--update', action='store_true', help="Update Subgen")
parser.add_argument('-x', '--exit-early', action='store_true', help="Exit without running transcriptarr.py")
parser.add_argument('-s', '--setup-bazarr', action='store_true', help="Prompt for common Bazarr setup parameters and save them for future runs")
parser.add_argument('-b', '--branch', type=str, default='main', help='Specify the branch to download from')
parser.add_argument('-l', '--launcher-update', action='store_true', help="Update launcher.py and re-launch")
args = parser.parse_args()
branch_name = args.branch if args.branch != 'main' else os.getenv('BRANCH', 'main')
script_name_suffix = f"-{branch_name}.py" if branch_name != "main" else ".py"
subgen_script_to_run = f"subgen{script_name_suffix}"
language_code_script_to_download = f"language_code{script_name_suffix}"
if args.launcher_update or convert_to_bool(os.getenv('LAUNCHER_UPDATE')):
print(f"Updating launcher.py from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/launcher.py", f'launcher{script_name_suffix}')
excluded_args = ['--launcher-update', '-l']
new_args = [arg for arg in sys.argv[1:] if arg not in excluded_args]
print(f"Relaunching updated launcher: launcher{script_name_suffix}")
os.execl(sys.executable, sys.executable, f"launcher{script_name_suffix}", *new_args)
# The script will not continue past os.execl
# --- Environment Variable Handling ---
# 1. Load from .env file first. This sets a baseline.
# External environment variables (set before launcher.py) will already be in os.environ
# and won't be overwritten by load_env_variables IF load_env_variables checked for existence.
# For simplicity, this version of load_env_variables *will* overwrite.
# If you need to preserve external env vars over .env, load_env_variables needs adjustment.
if args.setup_bazarr:
prompt_and_save_bazarr_env_variables()
# After saving, load them immediately for this run
load_env_variables()
else:
# Load if not setting up, assuming .env might exist
load_env_variables()
# 2. Override with command-line arguments (highest priority for these specific flags)
if args.debug: # If -d or --debug was passed
os.environ['DEBUG'] = 'True'
print("Launcher CLI: DEBUG set to True")
elif 'DEBUG' not in os.environ: # If not set by CLI and not by .env or external
os.environ['DEBUG'] = 'False' # Default to False if nothing else specified it
print("Launcher: DEBUG defaulted to False (no prior setting)")
if args.append: # If -a or --append was passed
os.environ['APPEND'] = 'True'
print("Launcher CLI: APPEND set to True")
elif 'APPEND' not in os.environ: # If not set by CLI and not by .env or external
os.environ['APPEND'] = 'False' # Default to False if nothing else specified it
#print("Launcher: APPEND defaulted to False (no prior setting)")
# --- End Environment Variable Handling ---
requirements_url = "https://raw.githubusercontent.com/McCloudS/subgen/main/requirements.txt"
requirements_file = "requirements.txt"
if args.install:
download_from_github(requirements_url, requirements_file)
install_packages_from_requirements(requirements_file)
if not os.path.exists(subgen_script_to_run) or args.update or convert_to_bool(os.getenv('UPDATE')):
print(f"Downloading {subgen_script_to_run} from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/transcriptarr.py", subgen_script_to_run)
print(f"Downloading {language_code_script_to_download} from GitHub branch {branch_name}...")
download_from_github(f"https://raw.githubusercontent.com/McCloudS/subgen/{branch_name}/language_code.py", language_code_script_to_download)
else:
print(f"{subgen_script_to_run} exists and UPDATE is set to False, skipping download.")
if not args.exit_early:
#print(f"DEBUG environment variable for transcriptarr.py: {os.getenv('DEBUG')}")
#print(f"APPEND environment variable for transcriptarr.py: {os.getenv('APPEND')}")
print(f'Launching {subgen_script_to_run}')
try:
subprocess.run([python_cmd, '-u', subgen_script_to_run], check=True)
except FileNotFoundError:
print(f"Error: Could not find {subgen_script_to_run}. Make sure it was downloaded correctly.")
except subprocess.CalledProcessError as e:
print(f"Error running {subgen_script_to_run}: {e}")
else:
print("Not running transcriptarr.py: -x or --exit-early set")
if __name__ == "__main__":
main()

View File

@@ -4,16 +4,28 @@ uvicorn[standard]
python-multipart
requests
python-dotenv>=1.0.0
psutil>=5.9.0
python-dateutil>=2.8.0
# Database & ORM (SQLite is built-in)
sqlalchemy>=2.0.0
pydantic>=2.0.0
pydantic-settings>=2.0.0
# Media processing (CPU-only by default)
# Media processing
numpy
ffmpeg-python
watchdog
apscheduler>=3.10.0
av>=10.0.0
# Whisper transcription (required)
openai-whisper
faster-whisper
stable-ts
# Translation (required for translate mode)
deep-translator>=1.11.0
# Optional dependencies (install based on configuration):
#
@@ -23,11 +35,6 @@ watchdog
# For MariaDB/MySQL database:
# pip install pymysql
#
# For Whisper transcription:
# pip install openai-whisper faster-whisper stable-ts
#
# For GPU support (NVIDIA):
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
#
# For media file handling:
# pip install av>=10.0.0
# pip install nvidia-ml-py3

View File

@@ -1,56 +0,0 @@
<?xml version="1.0"?>
<Container version="2">
<Name>subgen</Name>
<ExtraParams>--gpus all</ExtraParams>
<Beta>false</Beta>
<Category>CATEGORY:</Category>
<Repository>mccloud/subgen</Repository>
<Registry>https://github.com/McCloudS/subgen</Registry>
<DonateText>If you appreciate my work, then please consider donating</DonateText>
<DonateLink>https://www.paypal.com/donate/?hosted_button_id=SU4QQP6LH5PF6</DonateLink>
<DonateImg>https://www.paypal.com/en_US/i/btn/btn_donate_SM.gif</DonateImg>
<Network>bridge</Network>
<Privileged>false</Privileged>
<Support>https://github.com/McCloudS/subgen/issues</Support>
<Shell>bash</Shell>
<GitHub>https://github.com/McCloudS/subgen</GitHub>
<ReadMe>https://github.com/McCloudS/subgen/blob/main/README.md</ReadMe>
<Project>https://github.com/McCloudS/subgen</Project>
<Overview>subgen will transcribe your personal media on a Plex, Emby, or Jellyfin server to create subtitles (.srt) from audio/video files, it can also be used as a Whisper Provider in Bazarr</Overview>
<WebUI>http://[IP]:[PORT:9000]/docs</WebUI>
<TemplateURL>https://github.com/McCloudS/subgen/blob/main/subgen.xml</TemplateURL>
<Icon>https://raw.githubusercontent.com/McCloudS/subgen/main/icon.png</Icon>
<Date>2024-03-23</Date>
<Changes></Changes>
<Config Name="Port: Webhook Port" Target="9000" Default="9000" Mode="tcp" Description="This is the port for the webhook" Type="Port" Display="always" Required="true" Mask="false"/>
<Config Name="Path: /subgen" Target="/subgen" Default="/mnt/user/appdata/subgen" Mode="rw" Description="This is the container path to your configuration files." Type="Path" Display="always" Required="true" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_DEVICE" Target="TRANSCRIBE_DEVICE" Default="gpu" Description="Can transcribe via gpu (Cuda only) or cpu. Takes option of 'cpu', 'gpu', 'cuda'." Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: WHISPER_MODEL" Target="WHISPER_MODEL" Default="medium" Description="Can be:'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1','large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-medium.en', 'distil-small.en'" Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: CONCURRENT_TRANSCRIPTIONS" Target="CONCURRENT_TRANSCRIPTIONS" Default="2" Description="Number of files it will transcribe in parallel" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WHISPER_THREADS" Target="WHISPER_THREADS" Default="4" Description="number of threads to use during computation" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: MODEL_PATH" Target="MODEL_PATH" Default="./models" Description="This is where the WHISPER_MODEL will be stored. This defaults to placing it where you execute the script in the folder 'models'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PROCADDEDMEDIA" Target="PROCADDEDMEDIA" Default="True" Description="will gen subtitles for all media added regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PROCMEDIAONPLAY" Target="PROCMEDIAONPLAY" Default="True" Description="will gen subtitles for all played media regardless of existing external/embedded subtitles (based off of SKIPIFINTERNALSUBLANG)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: NAMESUBLANG" Target="NAMESUBLANG" Default="aa" Description="allows you to pick what it will name the subtitle. Instead of using EN, I'm using AA, so it doesn't mix with exiting external EN subs, and AA will populate higher on the list in Plex." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: SKIPIFINTERNALSUBLANG" Target="SKIPIFINTERNALSUBLANG" Default="eng" Description="Will not generate a subtitle if the file has an internal sub matching the 3 letter code of this variable (See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WORD_LEVEL_HIGHLIGHT" Target="WORD_LEVEL_HIGHLIGHT" Default="False" Description="Highlights each words as it's spoken in the subtitle. See example video @ https://github.com/jianfch/stable-ts" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PLEXSERVER" Target="PLEXSERVER" Default="http://plex:32400" Description="This needs to be set to your local plex server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: PLEXTOKEN" Target="PLEXTOKEN" Default="token here" Description="This needs to be set to your plex token found by https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: JELLYFINSERVER" Target="JELLYFINSERVER" Default="http://jellyfin:8096" Description="Set to your Jellyfin server address/port" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: JELLYFINTOKEN" Target="JELLYFINTOKEN" Default="token here" Description="Generate a token inside the Jellyfin interface" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: WEBHOOKPORT" Target="WEBHOOKPORT" Default="9000" Description="Change this if you need a different port for your webhook" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_FOLDERS" Target="TRANSCRIBE_FOLDERS" Default="" Description="Takes a pipe '|' separated list (For example: /tv|/movies|/familyvideos) and iterates through and adds those files to be queued for subtitle generation if they don't have internal subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: TRANSCRIBE_OR_TRANSLATE" Target="TRANSCRIBE_OR_TRANSLATE" Default="transcribe" Description="Takes either 'transcribe' or 'translate'. Transcribe will transcribe the audio in the same language as the input. Translate will transcribe and translate into English." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: COMPUTE_TYPE" Target="COMPUTE_TYPE" Default="auto" Description="Set compute-type using the following information: https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: DEBUG" Target="DEBUG" Default="True" Description="Provides some debug data that can be helpful to troubleshoot path mapping and other issues." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: FORCE_DETECTED_LANGUAGE_TO" Target="FORCE_DETECTED_LANGUAGE_TO" Default="" Description="This is to force the model to a language instead of the detected one, takes a 2 letter language code. For example, your audio is French but keeps detecting as English, you would set it to 'fr'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CLEAR_VRAM_ON_COMPLETE" Target="CLEAR_VRAM_ON_COMPLETE" Default="False" Description="This will delete the model and do garbage collection when queue is empty. Good if you need to use the VRAM for something else." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: UPDATE" Target="UPDATE" Default="True" Description="Will pull latest subgen.py from the repository if True. False will use the original subgen.py built into the Docker image. Standalone users can use this with launcher.py to get updates." Type="Variable" Display="always" Required="false" Mask="false"/>
<Config Name="Variable: APPEND" Target="APPEND" Default="False" Description="Will add the following at the end of a subtitle: 'Transcribed by whisperAI with faster-whisper ({whisper_model}) on {datetime.now()}'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: MONITOR" Target="MONITOR" Default="False" Description="Will monitor TRANSCRIBE_FOLDERS for real-time changes to see if we need to generate subtitles" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: USE_MODEL_PROMPT" Target="USE_MODEL_PROMPT" Default="False" Description="When set to True, will use the default prompt stored in greetings_translations 'Hello, welcome to my lecture.' to try and force the use of punctuation in transcriptions that don't." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CUSTOM_MODEL_PROMPT" Target="CUSTOM_MODEL_PROMPT" Default="" Description="If USE_MODEL_PROMPT is True, you can override the default prompt (See: https://medium.com/axinc-ai/prompt-engineering-in-whisper-6bb18003562d for great examples)." Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: LRC_FOR_AUDIO_FILES" Target="LRC_FOR_AUDIO_FILES" Default="True" Description="Will generate LRC (instead of SRT) files for filetypes: '.mp3', '.flac', '.wav', '.alac', '.ape', '.ogg', '.wma', '.m4a', '.m4b', '.aac', '.aiff'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
<Config Name="Variable: CUSTOM_REGROUP" Target="CUSTOM_REGROUP" Default="cm_sl=84_sl=42++++++1" Description="Attempts to regroup some of the segments to make a cleaner looking subtitle. See Issue #68 for discussion. Set to blank if you want to use Stable-TS default regroups algorithm of cm_sp=,* /_sg=.5_mg=.3+3_sp=.* /。/?/'" Type="Variable" Display="advanced" Required="false" Mask="false"/>
</Container>

View File

@@ -1,163 +0,0 @@
#!/usr/bin/env python3
"""Test script for TranscriptorIO backend components."""
import sys
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_config():
"""Test configuration loading."""
logger.info("Testing configuration...")
try:
from backend.config import settings
logger.info(f"✓ Config loaded successfully")
logger.info(f" - Mode: {settings.transcriptarr_mode}")
logger.info(f" - Database: {settings.database_type.value}")
logger.info(f" - Whisper Model: {settings.whisper_model}")
logger.info(f" - Device: {settings.transcribe_device}")
return True
except Exception as e:
logger.error(f"✗ Config test failed: {e}")
return False
def test_database():
"""Test database connection and table creation."""
logger.info("\nTesting database...")
try:
from backend.core.database import database
from backend.core.models import Base
# Clean database for fresh test
try:
database.drop_tables()
logger.info(f" - Dropped existing tables for clean test")
except:
pass
database.create_tables()
logger.info(f"✓ Database initialized with fresh tables")
# Test connection with health check
if database.health_check():
logger.info(f"✓ Database connection OK")
else:
logger.error("✗ Database health check failed (but tables were created)")
# Don't fail the test if health check fails but tables exist
return True
# Get stats
stats = database.get_stats()
logger.info(f" - Type: {stats['type']}")
logger.info(f" - URL: {stats['url']}")
return True
except Exception as e:
logger.error(f"✗ Database test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_queue_manager():
"""Test queue manager operations."""
logger.info("\nTesting queue manager...")
try:
from backend.core.queue_manager import queue_manager
from backend.core.models import QualityPreset
# Add a test job
job = queue_manager.add_job(
file_path="/test/anime.mkv",
file_name="anime.mkv",
source_lang="ja",
target_lang="es",
quality_preset=QualityPreset.FAST,
priority=5
)
if job:
logger.info(f"✓ Job created: {job.id}")
logger.info(f" - File: {job.file_name}")
logger.info(f" - Status: {job.status.value}")
logger.info(f" - Priority: {job.priority}")
else:
logger.error("✗ Failed to create job")
return False
# Get queue stats
stats = queue_manager.get_queue_stats()
logger.info(f"✓ Queue stats:")
logger.info(f" - Total: {stats['total']}")
logger.info(f" - Queued: {stats['queued']}")
logger.info(f" - Processing: {stats['processing']}")
logger.info(f" - Completed: {stats['completed']}")
# Try to add duplicate
duplicate = queue_manager.add_job(
file_path="/test/anime.mkv",
file_name="anime.mkv",
source_lang="ja",
target_lang="es",
quality_preset=QualityPreset.FAST
)
if duplicate is None:
logger.info(f"✓ Duplicate detection working")
else:
logger.warning(f"⚠ Duplicate job was created (should have been rejected)")
# Get next job
next_job = queue_manager.get_next_job("test-worker-1")
if next_job:
logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)")
logger.info(f" - Status: {next_job.status.value}")
else:
logger.error("✗ Failed to get next job")
return False
return True
except Exception as e:
logger.error(f"✗ Queue manager test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests."""
logger.info("=" * 60)
logger.info("TranscriptorIO Backend Test Suite")
logger.info("=" * 60)
results = {
"Config": test_config(),
"Database": test_database(),
"Queue Manager": test_queue_manager(),
}
logger.info("\n" + "=" * 60)
logger.info("Test Results:")
logger.info("=" * 60)
all_passed = True
for test_name, passed in results.items():
status = "✓ PASSED" if passed else "✗ FAILED"
logger.info(f"{test_name}: {status}")
if not passed:
all_passed = False
logger.info("=" * 60)
if all_passed:
logger.info("🎉 All tests passed!")
return 0
else:
logger.error("❌ Some tests failed")
return 1
if __name__ == "__main__":
sys.exit(main())

File diff suppressed because it is too large Load Diff