Compare commits
10 Commits
512a8a473b
...
ad0bdba03d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad0bdba03d | ||
|
|
11cca018ae | ||
|
|
948d44ca66 | ||
|
|
809f1a055d | ||
|
|
594b78b84f | ||
|
|
7373f9a87b | ||
|
|
1f428eabf9 | ||
|
|
a0778e7220 | ||
|
|
6e99826c9e | ||
|
|
8bbf9eff6f |
41
.github/workflows/build_GPU.yml
vendored
41
.github/workflows/build_GPU.yml
vendored
@@ -3,17 +3,17 @@ name: Build_Subgen_Dockerfile_GPU
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- requirements.txt
|
||||
- Dockerfile
|
||||
- 'requirements.txt'
|
||||
- 'Dockerfile'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: [self-hosted]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -21,27 +21,7 @@ jobs:
|
||||
id: get_version
|
||||
run: |
|
||||
version=$(grep -oP "subgen_version\s*=\s*'\K[^']+" subgen.py)
|
||||
echo "version=$version" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Pre-job:free disk space
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "Disk before cleanup:"
|
||||
df -h /
|
||||
|
||||
# Prune buildx builder data and docker caches
|
||||
sudo docker builder prune --all --force || true
|
||||
sudo docker system prune -af --volumes || true
|
||||
|
||||
# Remove temp files
|
||||
sudo rm -rf /tmp/* || true
|
||||
|
||||
# Remove old buildkit cache if present
|
||||
sudo rm -rf /var/lib/docker/buildkit || true
|
||||
|
||||
echo "Disk after cleanup:"
|
||||
df -h /
|
||||
echo "version=$version" >> $GITHUB_ENV
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
@@ -57,14 +37,3 @@ jobs:
|
||||
tags: |
|
||||
mccloud/subgen:latest
|
||||
mccloud/subgen:${{ env.version }}
|
||||
|
||||
- name: Post-job:cleanup local docker artifacts
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
sudo docker builder prune --all --force || true
|
||||
sudo docker image prune -af || true
|
||||
sudo docker container prune -f || true
|
||||
sudo docker system prune -af --volumes || true
|
||||
df -h /
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Stage 1: Builder
|
||||
FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS builder
|
||||
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
@@ -22,7 +22,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY . .
|
||||
|
||||
# Stage 2: Runtime
|
||||
FROM nvidia/cuda:12.6.3-base-ubuntu22.04
|
||||
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
|
||||
|
||||
WORKDIR /subgen
|
||||
|
||||
|
||||
33
subgen.py
33
subgen.py
@@ -1,4 +1,4 @@
|
||||
subgen_version = '2025.11.1'
|
||||
subgen_version = '2026.01.5'
|
||||
|
||||
"""
|
||||
ENVIRONMENT VARIABLES DOCUMENTATION
|
||||
@@ -74,6 +74,7 @@ from io import BytesIO
|
||||
import io
|
||||
import asyncio
|
||||
import torch
|
||||
import ctypes, ctypes.util
|
||||
from typing import List
|
||||
from enum import Enum
|
||||
|
||||
@@ -265,24 +266,24 @@ task_queue = DeduplicatedQueue()
|
||||
|
||||
def transcription_worker():
|
||||
while True:
|
||||
try:
|
||||
task = None
|
||||
try:
|
||||
task = task_queue.get(block=True, timeout=1)
|
||||
if "type" in task and task["type"] == "detect_language":
|
||||
detect_language_task(task['path'])
|
||||
elif 'Bazarr-' in task['path']:
|
||||
logging.info(f"Task {task['path']} is being handled by ASR.")
|
||||
else:
|
||||
logging.info(f"Task {task['path']} is being handled by Subgen.")
|
||||
logging.info(f"Task {task['path']} is being handled by Subgen.")
|
||||
gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language'])
|
||||
task_queue.task_done()
|
||||
# show queue
|
||||
logging.debug(f"Queue status: {task_queue.qsize()} tasks remaining")
|
||||
except queue.Empty:
|
||||
continue # This is ok, as we have a timeout, nothing needs to be printed
|
||||
continue
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing task: {e}", exc_info=True) # Log the error and the traceback
|
||||
else:
|
||||
delete_model() # Call delete_model() *only* if no exception occurred
|
||||
logging.error(f"Error processing task: {e}", exc_info=True)
|
||||
finally:
|
||||
if task: # Ensure a task was actually retrieved before calling task_done
|
||||
task_queue.task_done()
|
||||
delete_model()
|
||||
|
||||
for _ in range(concurrent_transcriptions):
|
||||
threading.Thread(target=transcription_worker, daemon=True).start()
|
||||
@@ -585,7 +586,6 @@ async def asr(
|
||||
|
||||
finally:
|
||||
await audio_file.close()
|
||||
task_queue.task_done()
|
||||
delete_model()
|
||||
|
||||
if result:
|
||||
@@ -665,7 +665,6 @@ async def detect_language(
|
||||
|
||||
finally:
|
||||
#await audio_file.close()
|
||||
task_queue.task_done()
|
||||
delete_model()
|
||||
|
||||
return {"detected_language": detected_language.to_name(), "language_code": language_code}
|
||||
@@ -796,14 +795,16 @@ def delete_model():
|
||||
global model
|
||||
if clear_vram_on_complete and task_queue.is_idle():
|
||||
logging.debug("Queue idle; clearing model from memory.")
|
||||
model.model.unload_model()
|
||||
del model
|
||||
model = None
|
||||
if model:
|
||||
model.model.unload_model()
|
||||
del model
|
||||
model = None
|
||||
if transcribe_device.lower() == 'cuda' and torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
logging.debug("CUDA cache cleared.")
|
||||
if os.name != 'nt': # don't garbage collect on Windows, it will crash the script
|
||||
gc.collect()
|
||||
ctypes.CDLL(ctypes.util.find_library('c')).malloc_trim(0)
|
||||
|
||||
def isAudioFileExtension(file_extension):
|
||||
return file_extension.casefold() in \
|
||||
@@ -1139,6 +1140,7 @@ def find_language_audio_track(audio_tracks, find_languages):
|
||||
if track['language'] == language:
|
||||
return language
|
||||
return None
|
||||
|
||||
def find_default_audio_track_language(audio_tracks):
|
||||
"""
|
||||
Finds the language of the default audio track in the given list of audio tracks.
|
||||
@@ -1255,6 +1257,7 @@ def should_skip_file(file_path: str, target_language: LanguageCode) -> bool:
|
||||
logging.debug(f"Processing {base_name}: No skip conditions met.")
|
||||
return False
|
||||
|
||||
|
||||
def get_subtitle_languages(video_path):
|
||||
"""
|
||||
Extract language codes from each audio stream in the video file using pyav.
|
||||
|
||||
Reference in New Issue
Block a user