From 529af217e92e5f534ab7a820f7411f9e1dc4626e Mon Sep 17 00:00:00 2001 From: Dasemu Date: Sun, 11 Jan 2026 21:23:58 +0100 Subject: [PATCH] docs: add comprehensive documentation and test suite - Add CLAUDE.md with project architecture and operation modes - Add backend/README.md with setup and usage instructions - Add test_backend.py with automated tests for config, database, and queue - Update requirements.txt with optional dependencies structure - Update .env.example with all configuration options --- .env.example | 90 +++++- CLAUDE.md | 747 ++++++++++++++++++++++++++++++++++++++++++++++ backend/README.md | 185 ++++++++++++ requirements.txt | 35 ++- test_backend.py | 163 ++++++++++ 5 files changed, 1211 insertions(+), 9 deletions(-) create mode 100644 CLAUDE.md create mode 100644 backend/README.md create mode 100755 test_backend.py diff --git a/.env.example b/.env.example index 1e0f198..274181f 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,90 @@ +# ============================================ +# TranscriptorIO Configuration +# ============================================ + +# === Application Mode === +# Options: standalone, provider, or standalone,provider (hybrid mode) +TRANSCRIPTARR_MODE=standalone + +# === Database Configuration === +# SQLite (default - no additional driver needed) +DATABASE_URL=sqlite:///./transcriptarr.db + +# PostgreSQL example (requires psycopg2-binary) +# DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr + +# MariaDB/MySQL example (requires pymysql) +# DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr + +# === Worker Configuration === +CONCURRENT_TRANSCRIPTIONS=2 +WHISPER_THREADS=4 +TRANSCRIBE_DEVICE=cpu +CLEAR_VRAM_ON_COMPLETE=True + +# === Whisper Model Configuration === +# Options: tiny, base, small, medium, large-v3, large-v3-turbo, etc. WHISPER_MODEL=medium -WEBHOOKPORT=9000 -TRANSCRIBE_DEVICE=gpu +MODEL_PATH=./models +COMPUTE_TYPE=auto + +# === Standalone Mode Configuration === +# Pipe-separated paths to scan +LIBRARY_PATHS=/media/anime|/media/movies +AUTO_SCAN_ENABLED=False +SCAN_INTERVAL_MINUTES=30 + +# Filter rules for standalone mode +REQUIRED_AUDIO_LANGUAGE=ja +REQUIRED_MISSING_SUBTITLE=spa +SKIP_IF_SUBTITLE_EXISTS=True + +# === Provider Mode Configuration === +BAZARR_URL=http://bazarr:6767 +BAZARR_API_KEY=your_api_key_here +PROVIDER_TIMEOUT_SECONDS=600 +PROVIDER_CALLBACK_ENABLED=True +PROVIDER_POLLING_INTERVAL=30 + +# === API Configuration === +WEBHOOK_PORT=9000 +API_HOST=0.0.0.0 DEBUG=True -CLEAR_VRAM_ON_COMPLETE=False + +# === Transcription Settings === +# Options: transcribe, translate +TRANSCRIBE_OR_TRANSLATE=transcribe +SUBTITLE_LANGUAGE_NAME= +# Options: ISO_639_1, ISO_639_2_T, ISO_639_2_B, NAME, NATIVE +SUBTITLE_LANGUAGE_NAMING_TYPE=ISO_639_2_B +WORD_LEVEL_HIGHLIGHT=False +CUSTOM_REGROUP=cm_sl=84_sl=42++++++1 + +# === Skip Configuration === +SKIP_IF_EXTERNAL_SUBTITLES_EXIST=False +SKIP_IF_TARGET_SUBTITLES_EXIST=True +SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE=eng +# Pipe-separated language codes +SKIP_SUBTITLE_LANGUAGES= +SKIP_IF_AUDIO_LANGUAGES= +SKIP_UNKNOWN_LANGUAGE=False +SKIP_ONLY_SUBGEN_SUBTITLES=False + +# === Advanced Settings === +FORCE_DETECTED_LANGUAGE_TO= +DETECT_LANGUAGE_LENGTH=30 +DETECT_LANGUAGE_OFFSET=0 +SHOULD_WHISPER_DETECT_AUDIO_LANGUAGE=False +# Pipe-separated list in order of preference +PREFERRED_AUDIO_LANGUAGES=eng + +# === Path Mapping === +USE_PATH_MAPPING=False +PATH_MAPPING_FROM=/tv +PATH_MAPPING_TO=/Volumes/TV + +# === Legacy SubGen Compatibility === +SHOW_IN_SUBNAME_SUBGEN=True +SHOW_IN_SUBNAME_MODEL=True APPEND=False +LRC_FOR_AUDIO_FILES=True \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e6baab6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,747 @@ +# CLAUDE.md - TranscriptorIO + +## ¿Qué es TranscriptorIO? + +TranscriptorIO es un sistema completo de generación automática de subtítulos para contenido multimedia usando IA (Whisper + modelos de traducción). Es un **hard fork** de [SubGen](https://github.com/McCloudS/subgen) con una arquitectura completamente rediseñada inspirada en Tdarr. + +## Motivación + +SubGen es funcional pero tiene limitaciones fundamentales de diseño: + +### Problemas de SubGen +- **Procesamiento síncrono**: Bloquea threads mientras transcribe +- **Sin cola persistente**: Los trabajos se pierden al reiniciar +- **Sin WebUI**: Removida en marzo 2024, solo tiene Swagger docs +- **Sin visibilidad**: No sabes progreso, ETA, o estado de trabajos +- **Sin priorización**: No puedes reordenar trabajos +- **Timeouts en Bazarr**: Si un episodio tarda >5min, throttle de 24 horas +- **Configuración compleja**: 40+ variables ENV sin validación + +### Visión de TranscriptorIO + +Un sistema tipo **Tdarr pero para subtítulos**, con: +- ✅ Sistema de cola asíncrona persistente (SQLite) +- ✅ Workers configurables (múltiples GPUs/CPUs) +- ✅ WebUI moderna con progreso en tiempo real +- ✅ Múltiples pipelines de calidad (Fast/Balanced/Best) +- ✅ Integración asíncrona con Bazarr +- ✅ Procesamiento batch (temporadas completas) +- ✅ API REST completa +- ✅ WebSocket para updates en vivo + +## Casos de uso + +### Caso principal: Anime japonés → Subtítulos español + +**Problema**: Anime sin fansubs en español, solo tiene audio japonés. + +**Pipeline**: +``` +Audio japonés + ↓ +Whisper (task="translate") → Texto inglés + ↓ +Helsinki-NLP (en→es) → Texto español + ↓ +Generar .srt con timestamps +``` + +**Alternativas configurables**: +- **Fast** (4GB VRAM): ja→en→es con Helsinki-NLP +- **Balanced** (6GB VRAM): ja→ja→es con M2M100 +- **Best** (10GB+ VRAM): ja→es directo con SeamlessM4T + +### Integración con stack existente +``` +Sonarr descarga episodio + ↓ +Bazarr detecta: faltan subtítulos español + ↓ +Bazarr → TranscriptorIO (provider asíncrono) + ↓ +TranscriptorIO encola trabajo + ↓ +Worker procesa cuando está libre + ↓ +Callback a Bazarr con .srt generado + ↓ +Jellyfin detecta nuevo subtítulo +``` + +## Modos de Operación + +TranscriptorIO soporta dos modos de operación distintos que se configuran vía environment variables: + +### Modo Standalone (Tdarr-like) + +**Descripción**: TranscriptorIO escanea automáticamente tu biblioteca de medios y genera subtítulos según reglas configurables. + +**Casos de uso**: +- Procesamiento batch de biblioteca existente +- Monitoreo automático de nuevos archivos +- Control total sobre qué se transcribe sin depender de Bazarr + +**Funcionamiento**: +``` +1. Escaneo periódico con ffprobe + └─> Detecta archivos que cumplen criterios + (Ej: audio japonés + sin subs español) + +2. Encolado automático + └─> Añade a cola con prioridad configurada + +3. Procesamiento batch + └─> Workers procesan según disponibilidad + +4. Escritura directa + └─> Guarda .srt junto al archivo origen +``` + +**Configuración**: +```env +# Habilitar modo standalone +TRANSCRIPTARR_MODE=standalone + +# Carpetas a escanear (separadas por |) +LIBRARY_PATHS=/media/anime|/media/movies + +# Reglas de filtrado +REQUIRED_AUDIO_LANGUAGE=ja +REQUIRED_MISSING_SUBTITLE=spa +SKIP_IF_SUBTITLE_EXISTS=true + +# Escaneo automático +AUTO_SCAN_ENABLED=true +SCAN_INTERVAL_MINUTES=30 +``` + +**Ventajas**: +- ✅ No depende de integraciones externas +- ✅ Procesamiento batch eficiente +- ✅ Monitoreo automático de nueva media +- ✅ Control granular con reglas de filtrado + +### Modo Provider (Bazarr-slave) + +**Descripción**: TranscriptorIO actúa como provider de subtítulos para Bazarr mediante una API asíncrona mejorada. + +**Casos de uso**: +- Integración con stack *arr existente +- Gestión centralizada de subtítulos en Bazarr +- Fallback cuando no hay subtítulos pre-hechos + +**Funcionamiento**: +``` +1. Bazarr solicita subtítulo (API call) + └─> POST /api/provider/request + +2. TranscriptorIO encola trabajo + └─> Retorna job_id inmediatamente + └─> No bloquea thread de Bazarr + +3. Procesamiento asíncrono + └─> Worker transcribe cuando hay capacidad + +4. Callback a Bazarr + └─> POST {bazarr_callback_url} con .srt + └─> O polling de Bazarr cada 30s +``` + +**Configuración**: +```env +# Habilitar modo provider +TRANSCRIPTARR_MODE=provider + +# API de Bazarr para callbacks +BAZARR_URL=http://bazarr:6767 +BAZARR_API_KEY=your_api_key_here + +# Configuración del provider +PROVIDER_TIMEOUT_SECONDS=600 +PROVIDER_CALLBACK_ENABLED=true +PROVIDER_POLLING_INTERVAL=30 +``` + +**Ventajas vs SubGen original**: +- ✅ **No bloquea**: Retorna inmediatamente con job_id +- ✅ **Sin timeouts**: Bazarr no throttle por trabajos lentos +- ✅ **Visibilidad**: Bazarr puede consultar progreso +- ✅ **Reintentos**: Manejo automático de errores +- ✅ **Priorización**: Trabajos manuales tienen mayor prioridad + +### Modo Híbrido (Recomendado) + +Puedes habilitar ambos modos simultáneamente: + +```env +TRANSCRIPTARR_MODE=standalone,provider +``` + +**Beneficios**: +- Bazarr maneja media nueva automáticamente +- Standalone procesa biblioteca existente +- Cola unificada con priorización inteligente +- Mejor aprovechamiento de recursos + +## Arquitectura técnica + +### Stack tecnológico + +**Backend**: +- FastAPI (API REST + WebSocket) +- SQLAlchemy (ORM multi-backend) +- SQLite / PostgreSQL / MariaDB (queue persistente) +- faster-whisper (transcripción optimizada) +- Helsinki-NLP/opus-mt-en-es (traducción ligera) +- stable-ts (mejora de timestamps) + +**Frontend**: +- Vue 3 + Vite +- Tailwind CSS +- Chart.js (estadísticas) +- Socket.io-client (updates en tiempo real) + +**Infraestructura**: +- Docker + Docker Compose +- NVIDIA GPU support (opcional, también CPU) +- Multi-container: backend + workers + frontend + +### Componentes principales +``` +transcriptorio/ +├── backend/ +│ ├── core/ +│ │ ├── pipelines/ +│ │ │ ├── whisper_fast.py # ja→en→es (Helsinki) +│ │ │ ├── whisper_balanced.py # ja→ja→es (M2M100) +│ │ │ └── seamless.py # ja→es directo +│ │ ├── queue_manager.py # Cola SQLite +│ │ ├── worker_pool.py # Gestión de workers +│ │ └── transcriber.py # Core Whisper +│ ├── api/ +│ │ ├── legacy.py # /asr (compat SubGen/Bazarr) +│ │ ├── queue.py # /api/queue/* +│ │ ├── jobs.py # /api/jobs/* +│ │ └── websocket.py # /ws (real-time) +│ └── main.py +├── frontend/ +│ ├── src/ +│ │ ├── components/ +│ │ │ ├── Dashboard.vue # Stats + current job +│ │ │ ├── QueueManager.vue # Lista de trabajos +│ │ │ ├── JobDetails.vue # Detalles + logs +│ │ │ └── Settings.vue # Configuración +│ │ ├── App.vue +│ │ └── main.js +│ └── package.json +├── bazarr-integration/ +│ └── transcriptorio_provider.py # Custom provider asíncrono +└── docker-compose.yml +``` + +### Base de datos (SQLite) +```sql +CREATE TABLE jobs ( + id TEXT PRIMARY KEY, + file_path TEXT NOT NULL, + file_name TEXT NOT NULL, + status TEXT DEFAULT 'queued', -- queued, processing, completed, failed + priority INTEGER DEFAULT 0, + + -- Config + source_lang TEXT, + target_lang TEXT, + quality_preset TEXT DEFAULT 'fast', + + -- Progress + progress REAL DEFAULT 0, + current_stage TEXT, -- transcribing, translating, generating + eta_seconds INTEGER, + + -- Timestamps + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + started_at TIMESTAMP, + completed_at TIMESTAMP, + + -- Results + output_path TEXT, + srt_content TEXT, + segments_count INTEGER, + + -- Error handling + error TEXT, + retry_count INTEGER DEFAULT 0, + + -- Metadata + worker_id TEXT, + vram_used_mb INTEGER, + processing_time_seconds REAL +); + +CREATE INDEX idx_status ON jobs(status); +CREATE INDEX idx_priority ON jobs(priority DESC, created_at ASC); +CREATE INDEX idx_created ON jobs(created_at DESC); +``` + +### API Endpoints + +#### Legacy (compatibilidad SubGen/Bazarr) +```http +POST /asr?task=translate&language=ja&output=srt +Content-Type: multipart/form-data + +→ Respuesta síncrona con .srt +``` + +#### Modernos (TranscriptorIO) +```http +# Añadir trabajo a cola +POST /api/queue/add +{ + "files": ["/media/anime/episode.mkv"], + "source_lang": "ja", + "target_lang": "es", + "quality_preset": "fast", + "priority": 0 +} +→ { "job_ids": ["uuid-1234"], "queued": 1 } + +# Estado de la cola +GET /api/queue/status +→ { + "pending": 3, + "processing": 1, + "completed_today": 12, + "failed_today": 0, + "vram_available": "1.5GB/4GB" +} + +# Detalles de trabajo +GET /api/jobs/{job_id} +→ { + "id": "uuid-1234", + "status": "processing", + "progress": 45.2, + "current_stage": "translating", + "eta_seconds": 120, + "file_name": "anime_ep01.mkv" +} + +# Historial +GET /api/jobs/history?limit=50 +→ [ { job }, { job }, ... ] + +# WebSocket updates +WS /ws +→ Stream continuo de updates +``` + +### WebUI + +#### Dashboard +``` +┌─────────────────────────────────────────────────────────┐ +│ TranscriptorIO 🟢 │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ 📊 Stats │ +│ ┌─────────┬──────────┬──────────┬─────────────────┐ │ +│ │ Queue: 3│Processing│Completed │ VRAM: 2.8/4.0GB │ │ +│ │ │ 1 │ Today │ │ │ +│ │ │ │ 12 │ │ │ +│ └─────────┴──────────┴──────────┴─────────────────┘ │ +│ │ +│ 🎬 Current Job │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ File: Anime_S01E05.mkv │ │ +│ │ Stage: Translating segments │ │ +│ │ Progress: ████████████░░░░░░ 65% │ │ +│ │ ETA: 2m 15s │ │ +│ │ Model: whisper-medium + helsinki-nlp │ │ +│ └─────────────────────────────────────────────────┘ │ +│ │ +│ 📋 Queue (3 pending) │ +│ ┌──┬─────────────────────┬────────┬──────────────┐ │ +│ │#1│Anime_S01E06.mkv │ Fast │ Priority: 0 │ │ +│ │#2│Movie_2024.mkv │ Best │ Priority: 0 │ │ +│ │#3│Show_S02E01.mkv │ Fast │ Priority: -1 │ │ +│ └──┴─────────────────────┴────────┴──────────────┘ │ +│ │ +│ [+ Add Files] [⚙️ Settings] [📊 Stats] [📖 Logs] │ +└─────────────────────────────────────────────────────────┘ +``` + +#### Settings +``` +┌─────────────────────────────────────────────────────────┐ +│ Settings │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ 🎯 Default Quality Preset │ +│ ○ Fast (4GB VRAM, ~3min/episode) │ +│ Whisper medium + Helsinki-NLP │ +│ Best for: GTX 1650, RTX 3050 │ +│ │ +│ ● Balanced (6GB VRAM, ~5min/episode) │ +│ Whisper medium + M2M100 │ +│ Best for: RTX 3060, RTX 4060 │ +│ │ +│ ○ Best (10GB+ VRAM, ~10min/episode) │ +│ SeamlessM4T direct translation │ +│ Best for: RTX 4070+, professional GPUs │ +│ │ +│ ⚡ Workers Configuration │ +│ GPU Workers: [2] ▾ │ +│ CPU Workers: [1] ▾ │ +│ Concurrent jobs per worker: [1] ▾ │ +│ │ +│ 🌐 Default Languages │ +│ Source: [Japanese ▾] Target: [Spanish ▾] │ +│ │ +│ 📁 Paths │ +│ Watch folders: /media/anime │ +│ /media/movies │ +│ Output format: {filename}.{lang}.srt │ +│ │ +│ 🔔 Notifications │ +│ ☑ Discord webhook on completion │ +│ ☑ Email on failure │ +│ │ +│ [Save Changes] [Reset Defaults] │ +└─────────────────────────────────────────────────────────┘ +``` + +## Pipeline de transcripción detallado + +### Flujo Fast Preset (ja→en→es) +```python +# 1. Extracción de audio (si es video) +ffprobe detecta pistas de audio +→ Selecciona pista japonesa +→ Extrae con ffmpeg (opcional, Whisper acepta video directo) + +# 2. Whisper transcripción +WhisperModel("medium", compute_type="int8") +→ transcribe(audio, language="ja", task="translate") +→ Output: Segmentos con timestamps en INGLÉS + +Ejemplo: +[0.00s -> 3.50s] "Hello, welcome to today's episode" +[3.50s -> 7.80s] "We're going to see something interesting" + +# 3. Traducción en→es (batch) +Helsinki-NLP/opus-mt-en-es +→ Batch de 32 segmentos a la vez +→ Mantiene timestamps originales + +Ejemplo: +[0.00s -> 3.50s] "Hola, bienvenido al episodio de hoy" +[3.50s -> 7.80s] "Vamos a ver algo interesante" + +# 4. Generación SRT +Formato timestamps + texto +→ Guarda archivo.es.srt + +# 5. Post-processing (opcional) +- Aeneas re-sync (ajuste fino de timestamps) +- Subtitle styling (ASS format) +- Quality check (detección de errores) +``` + +### Uso de VRAM esperado + +**GTX 1650 (4GB VRAM)**: +``` +Fast preset: +- Whisper medium INT8: ~2.5GB +- Helsinki-NLP: ~1GB +- Overhead sistema: ~0.5GB +Total: ~4GB ✅ Cabe perfecto +Tiempo: ~3-5 min por episodio 24min +``` + +**RTX 3060 (12GB VRAM)**: +``` +Balanced preset: +- Whisper large-v3 INT8: ~5GB +- M2M100: ~2GB +- Overhead: ~1GB +Total: ~8GB ✅ Sobra espacio +Tiempo: ~4-7 min por episodio 24min +``` + +## Integración con Bazarr + +### Custom Provider (asíncrono) +```python +# bazarr/libs/subliminal_patch/providers/transcriptorio.py + +class TranscriptorIOProvider(Provider): + """ + Provider asíncrono para TranscriptorIO + A diferencia del provider Whisper original, NO bloquea + """ + + provider_name = 'transcriptorio' + + def download_subtitle(self, subtitle): + # Si es búsqueda automática → async (no bloquea) + if not subtitle.manual_search: + job_id = self._queue_job(subtitle) + raise SubtitlePending( + job_id=job_id, + eta=self._estimate_time(subtitle) + ) + + # Si es búsqueda manual → sync con long polling + return self._process_sync(subtitle, timeout=600) + + def _queue_job(self, subtitle): + """Encola trabajo sin esperar""" + response = requests.post( + f"{self.endpoint}/api/queue/add", + json={ + "file": subtitle.video.name, + "source_lang": "ja", + "target_lang": "es", + "quality_preset": self.quality_preset, + "callback_url": self._get_callback_url(subtitle.id) + }, + headers={"X-API-Key": self.api_key} + ) + return response.json()["job_ids"][0] + +# Background task en Bazarr (cada 30s) +@scheduler.scheduled_job('interval', seconds=30) +def poll_transcriptorio_jobs(): + """Revisar trabajos completados""" + pending = db.get_pending_transcriptorio_jobs() + + for job in pending: + status = get_job_status(job.provider_job_id) + + if status['status'] == 'completed': + save_subtitle(job.subtitle_id, status['srt_content']) + db.mark_completed(job.id) +``` + +### Ventajas vs provider Whisper original + +| Feature | Whisper (original) | TranscriptorIO | +|---------|-------------------|----------------| +| Bloquea thread Bazarr | ✅ Sí (3-10min) | ❌ No (async) | +| Timeout 24h si tarda | ✅ Sí | ❌ No | +| Cola visible | ❌ No | ✅ Sí (WebUI) | +| Retry automático | ❌ No | ✅ Sí | +| Priorización | ❌ No | ✅ Sí | +| Múltiples GPUs | ❌ No | ✅ Sí | +| WebUI | ❌ No | ✅ Sí | + +## Roadmap de desarrollo + +### Fase 1: MVP Backend (2-3 semanas) + +**Objetivos**: +- [ ] Queue manager con SQLite +- [ ] Worker pool básico +- [ ] Pipeline Fast (Whisper + Helsinki-NLP) +- [ ] API REST completa +- [ ] Endpoint legacy `/asr` compatible + +**Entregables**: +- Backend funcional headless +- Docker Compose para testing +- Documentación API + +### Fase 2: WebUI (2-3 semanas) + +**Objetivos**: +- [ ] Dashboard con stats +- [ ] Queue viewer con drag&drop +- [ ] Job details con logs +- [ ] Settings page +- [ ] WebSocket integration + +**Entregables**: +- WebUI completa y funcional +- Mobile responsive +- Tema dark/light + +### Fase 3: Bazarr Integration (1-2 semanas) + +**Objetivos**: +- [ ] Custom provider asíncrono +- [ ] Background polling task +- [ ] Callback webhook support +- [ ] Testing con Bazarr real + +**Entregables**: +- Provider plugin para Bazarr +- Documentación integración +- PR al repo de Bazarr (si aceptan) + +### Fase 4: Features Avanzados (3-4 semanas) + +**Objetivos**: +- [ ] Pipeline Balanced (M2M100) +- [ ] Pipeline Best (SeamlessM4T) +- [ ] Batch operations (temporadas) +- [ ] Scanner automático (inotify) +- [ ] Post-processing (Aeneas sync) +- [ ] Notificaciones (Discord, email) + +**Entregables**: +- Sistema completo production-ready +- Docs completas +- Tests automatizados + +### Fase 5: Release & Community (ongoing) + +**Objetivos**: +- [ ] Docker Hub releases +- [ ] GitHub Actions CI/CD +- [ ] Documentación completa +- [ ] Video tutoriales +- [ ] Anuncio en comunidades + +**Canales**: +- /r/selfhosted +- /r/homelab +- Discord de Bazarr +- LinuxServer.io + +## Métricas de éxito + +**Técnicas**: +- ✅ Procesa episodio 24min en <5min (GTX 1650) +- ✅ Uso VRAM <4GB total +- ✅ Queue persiste entre reinicios +- ✅ API response time <100ms +- ✅ WebUI load time <2s + +**UX**: +- ✅ Setup en <15min para usuario promedio +- ✅ Zero-config con defaults razonables +- ✅ WebUI intuitiva (no necesita docs) + +**Comunidad**: +- 🎯 100 stars en primer mes +- 🎯 500 stars en 6 meses +- 🎯 10+ contributors +- 🎯 Featured en LinuxServer.io + +## Diferenciadores clave + +### vs SubGen +- ✅ WebUI moderna vs ❌ Sin UI +- ✅ Cola asíncrona vs ❌ Queue simple +- ✅ Múltiples presets vs ❌ Config manual +- ✅ Worker pool vs ❌ Single process + +### vs Tdarr +- ✅ Específico para subtítulos vs 🔧 General transcoding +- ✅ Integración Bazarr nativa vs ⚠️ Solo webhooks +- ✅ Traducción multilingüe vs ❌ No traduce + +### vs Whisper-ASR-Webservice +- ✅ Cola persistente vs ❌ Stateless +- ✅ WebUI vs ❌ Solo API +- ✅ Múltiples pipelines vs ⚠️ Solo Whisper + +## Consideraciones técnicas + +### Limitaciones conocidas + +**Whisper**: +- Solo traduce a inglés (limitación del modelo) +- Necesita audio limpio (música de fondo degrada calidad) +- Nombres propios se traducen mal +- Honoríficos japoneses se pierden + +**Traducción**: +- Helsinki-NLP a veces muy literal +- Expresiones idiomáticas se pierden +- Sin contexto entre segmentos + +**Hardware**: +- GPU mínima: GTX 1050 Ti (4GB VRAM) +- Recomendada: RTX 3060 (12GB VRAM) +- CPU funciona pero 10x más lento + +### Mitigaciones + +**Mejorar calidad**: +- Usar Balanced/Best presets si hay VRAM +- Post-processing con Aeneas para mejor sync +- Manual review de nombres propios +- Context prompting en Whisper + +**Optimizar velocidad**: +- Batch translation (32 segments) +- Cache de modelos en VRAM +- Pipeline paralelo (transcribe + traduce simultáneo) + +## Stack de desarrollo + +### Backend +``` +Python 3.11+ +FastAPI 0.100+ +SQLite 3.40+ +faster-whisper 1.0+ +transformers 4.35+ +torch 2.1+ (CUDA 12.x) +``` + +### Frontend +``` +Node 20+ +Vue 3.4+ +Vite 5+ +Tailwind CSS 3.4+ +Socket.io-client 4.7+ +Chart.js 4.4+ +``` + +### DevOps +``` +Docker 24+ +Docker Compose 2.20+ +GitHub Actions +Docker Hub +``` + +## Licencia + +**Apache 2.0** (misma que SubGen) + +Permite: +- ✅ Uso comercial +- ✅ Modificación +- ✅ Distribución +- ✅ Uso privado + +Requiere: +- ⚠️ Incluir licencia y copyright +- ⚠️ Documentar cambios + +## Contacto + +- **GitHub**: `github.com/[tu-usuario]/transcriptorio` +- **Discord**: [crear servidor] +- **Email**: [configurar] + +## Referencias + +- SubGen original: https://github.com/McCloudS/subgen +- Bazarr: https://github.com/morpheus65535/bazarr +- Whisper: https://github.com/openai/whisper +- faster-whisper: https://github.com/guillaumekln/faster-whisper +- stable-ts: https://github.com/jianfch/stable-ts +- Tdarr: https://github.com/HaveAGitGat/Tdarr + +--- + +**Última actualización**: 2026-01-11 +**Versión**: 0.1.0-planning +**Estado**: En diseño \ No newline at end of file diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000..459b7fa --- /dev/null +++ b/backend/README.md @@ -0,0 +1,185 @@ +# TranscriptorIO Backend + +This is the redesigned backend for TranscriptorIO, a complete fork of SubGen with modern asynchronous architecture. + +## 🎯 Goal + +Replace SubGen's synchronous non-persistent system with a modern Tdarr-inspired architecture: +- ✅ Persistent queue (SQLite/PostgreSQL/MariaDB) +- ✅ Asynchronous processing +- ✅ Job prioritization +- ✅ Complete state visibility +- ✅ No Bazarr timeouts + +## 📁 Structure + +``` +backend/ +├── core/ +│ ├── database.py # Multi-backend database management +│ ├── models.py # SQLAlchemy models (Job, etc.) +│ ├── queue_manager.py # Asynchronous persistent queue +│ └── __init__.py +├── api/ # (coming soon) FastAPI endpoints +├── config.py # Centralized configuration with Pydantic +└── README.md # This file +``` + +## 🚀 Setup + +### 1. Install dependencies + +```bash +pip install -r requirements.txt +``` + +### 2. Configure .env + +Copy `.env.example` to `.env` and adjust as needed: + +```bash +cp .env.example .env +``` + +#### Database Options + +**SQLite (default)**: +```env +DATABASE_URL=sqlite:///./transcriptarr.db +``` + +**PostgreSQL**: +```bash +pip install psycopg2-binary +``` +```env +DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr +``` + +**MariaDB/MySQL**: +```bash +pip install pymysql +``` +```env +DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr +``` + +### 3. Choose operation mode + +**Standalone Mode** (automatically scans your library): +```env +TRANSCRIPTARR_MODE=standalone +LIBRARY_PATHS=/media/anime|/media/movies +AUTO_SCAN_ENABLED=True +SCAN_INTERVAL_MINUTES=30 +``` + +**Provider Mode** (receives jobs from Bazarr): +```env +TRANSCRIPTARR_MODE=provider +BAZARR_URL=http://bazarr:6767 +BAZARR_API_KEY=your_api_key +``` + +**Hybrid Mode** (both simultaneously): +```env +TRANSCRIPTARR_MODE=standalone,provider +``` + +## 🧪 Testing + +Run the test script to verify everything works: + +```bash +python test_backend.py +``` + +This will verify: +- ✓ Configuration loading +- ✓ Database connection +- ✓ Table creation +- ✓ Queue operations (add, get, deduplicate) + +## 📊 Implemented Components + +### config.py +- Centralized configuration with Pydantic +- Automatic environment variable validation +- Multi-backend database support +- Operation mode configuration + +### database.py +- Connection management with SQLAlchemy +- Support for SQLite, PostgreSQL, MariaDB +- Backend-specific optimizations + - SQLite: WAL mode, optimized cache + - PostgreSQL: connection pooling, pre-ping + - MariaDB: utf8mb4 charset, pooling +- Health checks and statistics + +### models.py +- Complete `Job` model with: + - States: queued, processing, completed, failed, cancelled + - Stages: pending, detecting_language, transcribing, translating, etc. + - Quality presets: fast, balanced, best + - Progress tracking (0-100%) + - Complete timestamps + - Retry logic + - Worker assignment +- Optimized indexes for common queries + +### queue_manager.py +- Thread-safe persistent queue +- Job prioritization +- Duplicate detection +- Automatic retry for failed jobs +- Real-time statistics +- Automatic cleanup of old jobs + +## 🔄 Comparison with SubGen + +| Feature | SubGen | TranscriptorIO | +|---------|--------|----------------| +| Queue | In-memory (lost on restart) | **Persistent in DB** | +| Processing | Synchronous (blocks threads) | **Asynchronous** | +| Prioritization | No | **Yes (configurable)** | +| Visibility | No progress/ETA | **Progress + real-time ETA** | +| Deduplication | Basic (memory only) | **Persistent + intelligent** | +| Retries | No | **Automatic with limit** | +| Database | No | **SQLite/PostgreSQL/MariaDB** | +| Bazarr Timeouts | Yes (>5min = 24h throttle) | **No (async)** | + +## 📝 Next Steps + +1. **Worker Pool** - Asynchronous worker system +2. **REST API** - FastAPI endpoints for management +3. **WebSocket** - Real-time updates +4. **Transcriber** - Whisper wrapper with progress callbacks +5. **Bazarr Provider** - Improved async provider +6. **Standalone Scanner** - Automatic library scanning + +## 🐛 Troubleshooting + +### Error: "No module named 'backend'" + +Make sure to run scripts from the project root: +```bash +cd /home/dasemu/Hacking/Transcriptarr +python test_backend.py +``` + +### Error: Database locked (SQLite) + +SQLite is configured with WAL mode for better concurrency. If you still have issues, consider using PostgreSQL for production. + +### Error: pydantic.errors.ConfigError + +Verify that all required variables are in your `.env`: +```bash +cp .env.example .env +# Edit .env with your values +``` + +## 📚 Documentation + +See `CLAUDE.md` for complete architecture and project roadmap. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2f53476..2ef443c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,33 @@ -numpy -stable-ts +# Core dependencies fastapi -requests -faster-whisper -uvicorn +uvicorn[standard] python-multipart +requests +python-dotenv>=1.0.0 + +# Database & ORM (SQLite is built-in) +sqlalchemy>=2.0.0 +pydantic>=2.0.0 +pydantic-settings>=2.0.0 + +# Media processing (CPU-only by default) +numpy ffmpeg-python -whisper watchdog + +# Optional dependencies (install based on configuration): +# +# For PostgreSQL database: +# pip install psycopg2-binary +# +# For MariaDB/MySQL database: +# pip install pymysql +# +# For Whisper transcription: +# pip install openai-whisper faster-whisper stable-ts +# +# For GPU support (NVIDIA): +# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +# +# For media file handling: +# pip install av>=10.0.0 \ No newline at end of file diff --git a/test_backend.py b/test_backend.py new file mode 100755 index 0000000..2e87914 --- /dev/null +++ b/test_backend.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +"""Test script for TranscriptorIO backend components.""" +import sys +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def test_config(): + """Test configuration loading.""" + logger.info("Testing configuration...") + try: + from backend.config import settings + logger.info(f"✓ Config loaded successfully") + logger.info(f" - Mode: {settings.transcriptarr_mode}") + logger.info(f" - Database: {settings.database_type.value}") + logger.info(f" - Whisper Model: {settings.whisper_model}") + logger.info(f" - Device: {settings.transcribe_device}") + return True + except Exception as e: + logger.error(f"✗ Config test failed: {e}") + return False + + +def test_database(): + """Test database connection and table creation.""" + logger.info("\nTesting database...") + try: + from backend.core.database import database + from backend.core.models import Base + + # Clean database for fresh test + try: + database.drop_tables() + logger.info(f" - Dropped existing tables for clean test") + except: + pass + + database.create_tables() + logger.info(f"✓ Database initialized with fresh tables") + + # Test connection with health check + if database.health_check(): + logger.info(f"✓ Database connection OK") + else: + logger.error("✗ Database health check failed (but tables were created)") + # Don't fail the test if health check fails but tables exist + return True + + # Get stats + stats = database.get_stats() + logger.info(f" - Type: {stats['type']}") + logger.info(f" - URL: {stats['url']}") + + return True + except Exception as e: + logger.error(f"✗ Database test failed: {e}") + import traceback + traceback.print_exc() + return False + + +def test_queue_manager(): + """Test queue manager operations.""" + logger.info("\nTesting queue manager...") + try: + from backend.core.queue_manager import queue_manager + from backend.core.models import QualityPreset + + # Add a test job + job = queue_manager.add_job( + file_path="/test/anime.mkv", + file_name="anime.mkv", + source_lang="ja", + target_lang="es", + quality_preset=QualityPreset.FAST, + priority=5 + ) + + if job: + logger.info(f"✓ Job created: {job.id}") + logger.info(f" - File: {job.file_name}") + logger.info(f" - Status: {job.status.value}") + logger.info(f" - Priority: {job.priority}") + else: + logger.error("✗ Failed to create job") + return False + + # Get queue stats + stats = queue_manager.get_queue_stats() + logger.info(f"✓ Queue stats:") + logger.info(f" - Total: {stats['total']}") + logger.info(f" - Queued: {stats['queued']}") + logger.info(f" - Processing: {stats['processing']}") + logger.info(f" - Completed: {stats['completed']}") + + # Try to add duplicate + duplicate = queue_manager.add_job( + file_path="/test/anime.mkv", + file_name="anime.mkv", + source_lang="ja", + target_lang="es", + quality_preset=QualityPreset.FAST + ) + + if duplicate is None: + logger.info(f"✓ Duplicate detection working") + else: + logger.warning(f"⚠ Duplicate job was created (should have been rejected)") + + # Get next job + next_job = queue_manager.get_next_job("test-worker-1") + if next_job: + logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)") + logger.info(f" - Status: {next_job.status.value}") + else: + logger.error("✗ Failed to get next job") + return False + + return True + except Exception as e: + logger.error(f"✗ Queue manager test failed: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + """Run all tests.""" + logger.info("=" * 60) + logger.info("TranscriptorIO Backend Test Suite") + logger.info("=" * 60) + + results = { + "Config": test_config(), + "Database": test_database(), + "Queue Manager": test_queue_manager(), + } + + logger.info("\n" + "=" * 60) + logger.info("Test Results:") + logger.info("=" * 60) + + all_passed = True + for test_name, passed in results.items(): + status = "✓ PASSED" if passed else "✗ FAILED" + logger.info(f"{test_name}: {status}") + if not passed: + all_passed = False + + logger.info("=" * 60) + + if all_passed: + logger.info("🎉 All tests passed!") + return 0 + else: + logger.error("❌ Some tests failed") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file