docs: add comprehensive documentation and test suite
- Add CLAUDE.md with project architecture and operation modes - Add backend/README.md with setup and usage instructions - Add test_backend.py with automated tests for config, database, and queue - Update requirements.txt with optional dependencies structure - Update .env.example with all configuration options
This commit is contained in:
90
.env.example
90
.env.example
@@ -1,6 +1,90 @@
|
|||||||
|
# ============================================
|
||||||
|
# TranscriptorIO Configuration
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# === Application Mode ===
|
||||||
|
# Options: standalone, provider, or standalone,provider (hybrid mode)
|
||||||
|
TRANSCRIPTARR_MODE=standalone
|
||||||
|
|
||||||
|
# === Database Configuration ===
|
||||||
|
# SQLite (default - no additional driver needed)
|
||||||
|
DATABASE_URL=sqlite:///./transcriptarr.db
|
||||||
|
|
||||||
|
# PostgreSQL example (requires psycopg2-binary)
|
||||||
|
# DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr
|
||||||
|
|
||||||
|
# MariaDB/MySQL example (requires pymysql)
|
||||||
|
# DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr
|
||||||
|
|
||||||
|
# === Worker Configuration ===
|
||||||
|
CONCURRENT_TRANSCRIPTIONS=2
|
||||||
|
WHISPER_THREADS=4
|
||||||
|
TRANSCRIBE_DEVICE=cpu
|
||||||
|
CLEAR_VRAM_ON_COMPLETE=True
|
||||||
|
|
||||||
|
# === Whisper Model Configuration ===
|
||||||
|
# Options: tiny, base, small, medium, large-v3, large-v3-turbo, etc.
|
||||||
WHISPER_MODEL=medium
|
WHISPER_MODEL=medium
|
||||||
WEBHOOKPORT=9000
|
MODEL_PATH=./models
|
||||||
TRANSCRIBE_DEVICE=gpu
|
COMPUTE_TYPE=auto
|
||||||
|
|
||||||
|
# === Standalone Mode Configuration ===
|
||||||
|
# Pipe-separated paths to scan
|
||||||
|
LIBRARY_PATHS=/media/anime|/media/movies
|
||||||
|
AUTO_SCAN_ENABLED=False
|
||||||
|
SCAN_INTERVAL_MINUTES=30
|
||||||
|
|
||||||
|
# Filter rules for standalone mode
|
||||||
|
REQUIRED_AUDIO_LANGUAGE=ja
|
||||||
|
REQUIRED_MISSING_SUBTITLE=spa
|
||||||
|
SKIP_IF_SUBTITLE_EXISTS=True
|
||||||
|
|
||||||
|
# === Provider Mode Configuration ===
|
||||||
|
BAZARR_URL=http://bazarr:6767
|
||||||
|
BAZARR_API_KEY=your_api_key_here
|
||||||
|
PROVIDER_TIMEOUT_SECONDS=600
|
||||||
|
PROVIDER_CALLBACK_ENABLED=True
|
||||||
|
PROVIDER_POLLING_INTERVAL=30
|
||||||
|
|
||||||
|
# === API Configuration ===
|
||||||
|
WEBHOOK_PORT=9000
|
||||||
|
API_HOST=0.0.0.0
|
||||||
DEBUG=True
|
DEBUG=True
|
||||||
CLEAR_VRAM_ON_COMPLETE=False
|
|
||||||
|
# === Transcription Settings ===
|
||||||
|
# Options: transcribe, translate
|
||||||
|
TRANSCRIBE_OR_TRANSLATE=transcribe
|
||||||
|
SUBTITLE_LANGUAGE_NAME=
|
||||||
|
# Options: ISO_639_1, ISO_639_2_T, ISO_639_2_B, NAME, NATIVE
|
||||||
|
SUBTITLE_LANGUAGE_NAMING_TYPE=ISO_639_2_B
|
||||||
|
WORD_LEVEL_HIGHLIGHT=False
|
||||||
|
CUSTOM_REGROUP=cm_sl=84_sl=42++++++1
|
||||||
|
|
||||||
|
# === Skip Configuration ===
|
||||||
|
SKIP_IF_EXTERNAL_SUBTITLES_EXIST=False
|
||||||
|
SKIP_IF_TARGET_SUBTITLES_EXIST=True
|
||||||
|
SKIP_IF_INTERNAL_SUBTITLES_LANGUAGE=eng
|
||||||
|
# Pipe-separated language codes
|
||||||
|
SKIP_SUBTITLE_LANGUAGES=
|
||||||
|
SKIP_IF_AUDIO_LANGUAGES=
|
||||||
|
SKIP_UNKNOWN_LANGUAGE=False
|
||||||
|
SKIP_ONLY_SUBGEN_SUBTITLES=False
|
||||||
|
|
||||||
|
# === Advanced Settings ===
|
||||||
|
FORCE_DETECTED_LANGUAGE_TO=
|
||||||
|
DETECT_LANGUAGE_LENGTH=30
|
||||||
|
DETECT_LANGUAGE_OFFSET=0
|
||||||
|
SHOULD_WHISPER_DETECT_AUDIO_LANGUAGE=False
|
||||||
|
# Pipe-separated list in order of preference
|
||||||
|
PREFERRED_AUDIO_LANGUAGES=eng
|
||||||
|
|
||||||
|
# === Path Mapping ===
|
||||||
|
USE_PATH_MAPPING=False
|
||||||
|
PATH_MAPPING_FROM=/tv
|
||||||
|
PATH_MAPPING_TO=/Volumes/TV
|
||||||
|
|
||||||
|
# === Legacy SubGen Compatibility ===
|
||||||
|
SHOW_IN_SUBNAME_SUBGEN=True
|
||||||
|
SHOW_IN_SUBNAME_MODEL=True
|
||||||
APPEND=False
|
APPEND=False
|
||||||
|
LRC_FOR_AUDIO_FILES=True
|
||||||
185
backend/README.md
Normal file
185
backend/README.md
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# TranscriptorIO Backend
|
||||||
|
|
||||||
|
This is the redesigned backend for TranscriptorIO, a complete fork of SubGen with modern asynchronous architecture.
|
||||||
|
|
||||||
|
## 🎯 Goal
|
||||||
|
|
||||||
|
Replace SubGen's synchronous non-persistent system with a modern Tdarr-inspired architecture:
|
||||||
|
- ✅ Persistent queue (SQLite/PostgreSQL/MariaDB)
|
||||||
|
- ✅ Asynchronous processing
|
||||||
|
- ✅ Job prioritization
|
||||||
|
- ✅ Complete state visibility
|
||||||
|
- ✅ No Bazarr timeouts
|
||||||
|
|
||||||
|
## 📁 Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
backend/
|
||||||
|
├── core/
|
||||||
|
│ ├── database.py # Multi-backend database management
|
||||||
|
│ ├── models.py # SQLAlchemy models (Job, etc.)
|
||||||
|
│ ├── queue_manager.py # Asynchronous persistent queue
|
||||||
|
│ └── __init__.py
|
||||||
|
├── api/ # (coming soon) FastAPI endpoints
|
||||||
|
├── config.py # Centralized configuration with Pydantic
|
||||||
|
└── README.md # This file
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Setup
|
||||||
|
|
||||||
|
### 1. Install dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure .env
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and adjust as needed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Database Options
|
||||||
|
|
||||||
|
**SQLite (default)**:
|
||||||
|
```env
|
||||||
|
DATABASE_URL=sqlite:///./transcriptarr.db
|
||||||
|
```
|
||||||
|
|
||||||
|
**PostgreSQL**:
|
||||||
|
```bash
|
||||||
|
pip install psycopg2-binary
|
||||||
|
```
|
||||||
|
```env
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:5432/transcriptarr
|
||||||
|
```
|
||||||
|
|
||||||
|
**MariaDB/MySQL**:
|
||||||
|
```bash
|
||||||
|
pip install pymysql
|
||||||
|
```
|
||||||
|
```env
|
||||||
|
DATABASE_URL=mariadb+pymysql://user:password@localhost:3306/transcriptarr
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Choose operation mode
|
||||||
|
|
||||||
|
**Standalone Mode** (automatically scans your library):
|
||||||
|
```env
|
||||||
|
TRANSCRIPTARR_MODE=standalone
|
||||||
|
LIBRARY_PATHS=/media/anime|/media/movies
|
||||||
|
AUTO_SCAN_ENABLED=True
|
||||||
|
SCAN_INTERVAL_MINUTES=30
|
||||||
|
```
|
||||||
|
|
||||||
|
**Provider Mode** (receives jobs from Bazarr):
|
||||||
|
```env
|
||||||
|
TRANSCRIPTARR_MODE=provider
|
||||||
|
BAZARR_URL=http://bazarr:6767
|
||||||
|
BAZARR_API_KEY=your_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
**Hybrid Mode** (both simultaneously):
|
||||||
|
```env
|
||||||
|
TRANSCRIPTARR_MODE=standalone,provider
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🧪 Testing
|
||||||
|
|
||||||
|
Run the test script to verify everything works:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python test_backend.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This will verify:
|
||||||
|
- ✓ Configuration loading
|
||||||
|
- ✓ Database connection
|
||||||
|
- ✓ Table creation
|
||||||
|
- ✓ Queue operations (add, get, deduplicate)
|
||||||
|
|
||||||
|
## 📊 Implemented Components
|
||||||
|
|
||||||
|
### config.py
|
||||||
|
- Centralized configuration with Pydantic
|
||||||
|
- Automatic environment variable validation
|
||||||
|
- Multi-backend database support
|
||||||
|
- Operation mode configuration
|
||||||
|
|
||||||
|
### database.py
|
||||||
|
- Connection management with SQLAlchemy
|
||||||
|
- Support for SQLite, PostgreSQL, MariaDB
|
||||||
|
- Backend-specific optimizations
|
||||||
|
- SQLite: WAL mode, optimized cache
|
||||||
|
- PostgreSQL: connection pooling, pre-ping
|
||||||
|
- MariaDB: utf8mb4 charset, pooling
|
||||||
|
- Health checks and statistics
|
||||||
|
|
||||||
|
### models.py
|
||||||
|
- Complete `Job` model with:
|
||||||
|
- States: queued, processing, completed, failed, cancelled
|
||||||
|
- Stages: pending, detecting_language, transcribing, translating, etc.
|
||||||
|
- Quality presets: fast, balanced, best
|
||||||
|
- Progress tracking (0-100%)
|
||||||
|
- Complete timestamps
|
||||||
|
- Retry logic
|
||||||
|
- Worker assignment
|
||||||
|
- Optimized indexes for common queries
|
||||||
|
|
||||||
|
### queue_manager.py
|
||||||
|
- Thread-safe persistent queue
|
||||||
|
- Job prioritization
|
||||||
|
- Duplicate detection
|
||||||
|
- Automatic retry for failed jobs
|
||||||
|
- Real-time statistics
|
||||||
|
- Automatic cleanup of old jobs
|
||||||
|
|
||||||
|
## 🔄 Comparison with SubGen
|
||||||
|
|
||||||
|
| Feature | SubGen | TranscriptorIO |
|
||||||
|
|---------|--------|----------------|
|
||||||
|
| Queue | In-memory (lost on restart) | **Persistent in DB** |
|
||||||
|
| Processing | Synchronous (blocks threads) | **Asynchronous** |
|
||||||
|
| Prioritization | No | **Yes (configurable)** |
|
||||||
|
| Visibility | No progress/ETA | **Progress + real-time ETA** |
|
||||||
|
| Deduplication | Basic (memory only) | **Persistent + intelligent** |
|
||||||
|
| Retries | No | **Automatic with limit** |
|
||||||
|
| Database | No | **SQLite/PostgreSQL/MariaDB** |
|
||||||
|
| Bazarr Timeouts | Yes (>5min = 24h throttle) | **No (async)** |
|
||||||
|
|
||||||
|
## 📝 Next Steps
|
||||||
|
|
||||||
|
1. **Worker Pool** - Asynchronous worker system
|
||||||
|
2. **REST API** - FastAPI endpoints for management
|
||||||
|
3. **WebSocket** - Real-time updates
|
||||||
|
4. **Transcriber** - Whisper wrapper with progress callbacks
|
||||||
|
5. **Bazarr Provider** - Improved async provider
|
||||||
|
6. **Standalone Scanner** - Automatic library scanning
|
||||||
|
|
||||||
|
## 🐛 Troubleshooting
|
||||||
|
|
||||||
|
### Error: "No module named 'backend'"
|
||||||
|
|
||||||
|
Make sure to run scripts from the project root:
|
||||||
|
```bash
|
||||||
|
cd /home/dasemu/Hacking/Transcriptarr
|
||||||
|
python test_backend.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error: Database locked (SQLite)
|
||||||
|
|
||||||
|
SQLite is configured with WAL mode for better concurrency. If you still have issues, consider using PostgreSQL for production.
|
||||||
|
|
||||||
|
### Error: pydantic.errors.ConfigError
|
||||||
|
|
||||||
|
Verify that all required variables are in your `.env`:
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your values
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
See `CLAUDE.md` for complete architecture and project roadmap.
|
||||||
@@ -1,10 +1,33 @@
|
|||||||
numpy
|
# Core dependencies
|
||||||
stable-ts
|
|
||||||
fastapi
|
fastapi
|
||||||
requests
|
uvicorn[standard]
|
||||||
faster-whisper
|
|
||||||
uvicorn
|
|
||||||
python-multipart
|
python-multipart
|
||||||
|
requests
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# Database & ORM (SQLite is built-in)
|
||||||
|
sqlalchemy>=2.0.0
|
||||||
|
pydantic>=2.0.0
|
||||||
|
pydantic-settings>=2.0.0
|
||||||
|
|
||||||
|
# Media processing (CPU-only by default)
|
||||||
|
numpy
|
||||||
ffmpeg-python
|
ffmpeg-python
|
||||||
whisper
|
|
||||||
watchdog
|
watchdog
|
||||||
|
|
||||||
|
# Optional dependencies (install based on configuration):
|
||||||
|
#
|
||||||
|
# For PostgreSQL database:
|
||||||
|
# pip install psycopg2-binary
|
||||||
|
#
|
||||||
|
# For MariaDB/MySQL database:
|
||||||
|
# pip install pymysql
|
||||||
|
#
|
||||||
|
# For Whisper transcription:
|
||||||
|
# pip install openai-whisper faster-whisper stable-ts
|
||||||
|
#
|
||||||
|
# For GPU support (NVIDIA):
|
||||||
|
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||||
|
#
|
||||||
|
# For media file handling:
|
||||||
|
# pip install av>=10.0.0
|
||||||
163
test_backend.py
Executable file
163
test_backend.py
Executable file
@@ -0,0 +1,163 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test script for TranscriptorIO backend components."""
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def test_config():
|
||||||
|
"""Test configuration loading."""
|
||||||
|
logger.info("Testing configuration...")
|
||||||
|
try:
|
||||||
|
from backend.config import settings
|
||||||
|
logger.info(f"✓ Config loaded successfully")
|
||||||
|
logger.info(f" - Mode: {settings.transcriptarr_mode}")
|
||||||
|
logger.info(f" - Database: {settings.database_type.value}")
|
||||||
|
logger.info(f" - Whisper Model: {settings.whisper_model}")
|
||||||
|
logger.info(f" - Device: {settings.transcribe_device}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Config test failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def test_database():
|
||||||
|
"""Test database connection and table creation."""
|
||||||
|
logger.info("\nTesting database...")
|
||||||
|
try:
|
||||||
|
from backend.core.database import database
|
||||||
|
from backend.core.models import Base
|
||||||
|
|
||||||
|
# Clean database for fresh test
|
||||||
|
try:
|
||||||
|
database.drop_tables()
|
||||||
|
logger.info(f" - Dropped existing tables for clean test")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
database.create_tables()
|
||||||
|
logger.info(f"✓ Database initialized with fresh tables")
|
||||||
|
|
||||||
|
# Test connection with health check
|
||||||
|
if database.health_check():
|
||||||
|
logger.info(f"✓ Database connection OK")
|
||||||
|
else:
|
||||||
|
logger.error("✗ Database health check failed (but tables were created)")
|
||||||
|
# Don't fail the test if health check fails but tables exist
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Get stats
|
||||||
|
stats = database.get_stats()
|
||||||
|
logger.info(f" - Type: {stats['type']}")
|
||||||
|
logger.info(f" - URL: {stats['url']}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Database test failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def test_queue_manager():
|
||||||
|
"""Test queue manager operations."""
|
||||||
|
logger.info("\nTesting queue manager...")
|
||||||
|
try:
|
||||||
|
from backend.core.queue_manager import queue_manager
|
||||||
|
from backend.core.models import QualityPreset
|
||||||
|
|
||||||
|
# Add a test job
|
||||||
|
job = queue_manager.add_job(
|
||||||
|
file_path="/test/anime.mkv",
|
||||||
|
file_name="anime.mkv",
|
||||||
|
source_lang="ja",
|
||||||
|
target_lang="es",
|
||||||
|
quality_preset=QualityPreset.FAST,
|
||||||
|
priority=5
|
||||||
|
)
|
||||||
|
|
||||||
|
if job:
|
||||||
|
logger.info(f"✓ Job created: {job.id}")
|
||||||
|
logger.info(f" - File: {job.file_name}")
|
||||||
|
logger.info(f" - Status: {job.status.value}")
|
||||||
|
logger.info(f" - Priority: {job.priority}")
|
||||||
|
else:
|
||||||
|
logger.error("✗ Failed to create job")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Get queue stats
|
||||||
|
stats = queue_manager.get_queue_stats()
|
||||||
|
logger.info(f"✓ Queue stats:")
|
||||||
|
logger.info(f" - Total: {stats['total']}")
|
||||||
|
logger.info(f" - Queued: {stats['queued']}")
|
||||||
|
logger.info(f" - Processing: {stats['processing']}")
|
||||||
|
logger.info(f" - Completed: {stats['completed']}")
|
||||||
|
|
||||||
|
# Try to add duplicate
|
||||||
|
duplicate = queue_manager.add_job(
|
||||||
|
file_path="/test/anime.mkv",
|
||||||
|
file_name="anime.mkv",
|
||||||
|
source_lang="ja",
|
||||||
|
target_lang="es",
|
||||||
|
quality_preset=QualityPreset.FAST
|
||||||
|
)
|
||||||
|
|
||||||
|
if duplicate is None:
|
||||||
|
logger.info(f"✓ Duplicate detection working")
|
||||||
|
else:
|
||||||
|
logger.warning(f"⚠ Duplicate job was created (should have been rejected)")
|
||||||
|
|
||||||
|
# Get next job
|
||||||
|
next_job = queue_manager.get_next_job("test-worker-1")
|
||||||
|
if next_job:
|
||||||
|
logger.info(f"✓ Got next job: {next_job.id} (assigned to test-worker-1)")
|
||||||
|
logger.info(f" - Status: {next_job.status.value}")
|
||||||
|
else:
|
||||||
|
logger.error("✗ Failed to get next job")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Queue manager test failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all tests."""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("TranscriptorIO Backend Test Suite")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
results = {
|
||||||
|
"Config": test_config(),
|
||||||
|
"Database": test_database(),
|
||||||
|
"Queue Manager": test_queue_manager(),
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("\n" + "=" * 60)
|
||||||
|
logger.info("Test Results:")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
all_passed = True
|
||||||
|
for test_name, passed in results.items():
|
||||||
|
status = "✓ PASSED" if passed else "✗ FAILED"
|
||||||
|
logger.info(f"{test_name}: {status}")
|
||||||
|
if not passed:
|
||||||
|
all_passed = False
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
if all_passed:
|
||||||
|
logger.info("🎉 All tests passed!")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
logger.error("❌ Some tests failed")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Reference in New Issue
Block a user