Compare commits

..

10 Commits

Author SHA1 Message Date
McCloudS
ad0bdba03d Fix double task_done in ASR/detect-language handlers (#262)
All checks were successful
Update_CalVer_Amend / docker (push) Successful in 24s
2026-01-10 22:59:03 +00:00
McCloudS
11cca018ae Check model existence before unloading
Add a check to ensure model exists before unloading.
2026-01-07 22:17:19 +00:00
McCloudS
948d44ca66 Remove unused import statement for cutil
Removed unused import of cutil.
2026-01-07 21:33:27 +00:00
McCloudS
809f1a055d Import cutil and ctypes for memory optimization
Added ctypes and cutil imports for memory management.
2026-01-07 19:13:28 +00:00
McCloudS
594b78b84f Refactor transcription worker error handling
Might fix CPU memory leak.
2026-01-07 18:39:30 +00:00
McCloudS
7373f9a87b Change runner to self-hosted for GPU build 2025-12-13 14:15:57 -05:00
McCloudS
1f428eabf9 Update GitHub Actions workflow for GPU build 2025-12-13 12:58:21 -05:00
McCloudS
a0778e7220 Remove local Docker artifacts cleanup step
Removed cleanup step for local Docker artifacts after job completion.
2025-12-13 12:46:14 -05:00
McCloudS
6e99826c9e Downgrade CUDA base image version in Dockerfile
Not worth the other broken stuff to save ~1gb of space for now...
2025-12-13 12:34:08 -05:00
McCloudS
8bbf9eff6f Remove disk cleanup step from build_GPU workflow
Removed pre-job step for cleaning up disk space.
2025-12-13 12:12:20 -05:00
3 changed files with 25 additions and 53 deletions

View File

@@ -3,17 +3,17 @@ name: Build_Subgen_Dockerfile_GPU
on:
push:
paths:
- requirements.txt
- Dockerfile
- 'requirements.txt'
- 'Dockerfile'
workflow_dispatch:
jobs:
docker:
runs-on: ubuntu-latest
runs-on: [self-hosted]
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v2
with:
fetch-depth: 0
@@ -21,27 +21,7 @@ jobs:
id: get_version
run: |
version=$(grep -oP "subgen_version\s*=\s*'\K[^']+" subgen.py)
echo "version=$version" >> "$GITHUB_ENV"
- name: Pre-job:free disk space
shell: bash
run: |
set -euo pipefail
echo "Disk before cleanup:"
df -h /
# Prune buildx builder data and docker caches
sudo docker builder prune --all --force || true
sudo docker system prune -af --volumes || true
# Remove temp files
sudo rm -rf /tmp/* || true
# Remove old buildkit cache if present
sudo rm -rf /var/lib/docker/buildkit || true
echo "Disk after cleanup:"
df -h /
echo "version=$version" >> $GITHUB_ENV
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -57,14 +37,3 @@ jobs:
tags: |
mccloud/subgen:latest
mccloud/subgen:${{ env.version }}
- name: Post-job:cleanup local docker artifacts
if: always()
shell: bash
run: |
set -euo pipefail
sudo docker builder prune --all --force || true
sudo docker image prune -af || true
sudo docker container prune -f || true
sudo docker system prune -af --volumes || true
df -h /

View File

@@ -1,5 +1,5 @@
# Stage 1: Builder
FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS builder
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
WORKDIR /subgen
@@ -22,7 +22,7 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Stage 2: Runtime
FROM nvidia/cuda:12.6.3-base-ubuntu22.04
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
WORKDIR /subgen

View File

@@ -1,4 +1,4 @@
subgen_version = '2025.11.1'
subgen_version = '2026.01.5'
"""
ENVIRONMENT VARIABLES DOCUMENTATION
@@ -74,6 +74,7 @@ from io import BytesIO
import io
import asyncio
import torch
import ctypes, ctypes.util
from typing import List
from enum import Enum
@@ -265,24 +266,24 @@ task_queue = DeduplicatedQueue()
def transcription_worker():
while True:
try:
task = None
try:
task = task_queue.get(block=True, timeout=1)
if "type" in task and task["type"] == "detect_language":
detect_language_task(task['path'])
elif 'Bazarr-' in task['path']:
logging.info(f"Task {task['path']} is being handled by ASR.")
else:
logging.info(f"Task {task['path']} is being handled by Subgen.")
logging.info(f"Task {task['path']} is being handled by Subgen.")
gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language'])
task_queue.task_done()
# show queue
logging.debug(f"Queue status: {task_queue.qsize()} tasks remaining")
except queue.Empty:
continue # This is ok, as we have a timeout, nothing needs to be printed
continue
except Exception as e:
logging.error(f"Error processing task: {e}", exc_info=True) # Log the error and the traceback
else:
delete_model() # Call delete_model() *only* if no exception occurred
logging.error(f"Error processing task: {e}", exc_info=True)
finally:
if task: # Ensure a task was actually retrieved before calling task_done
task_queue.task_done()
delete_model()
for _ in range(concurrent_transcriptions):
threading.Thread(target=transcription_worker, daemon=True).start()
@@ -585,7 +586,6 @@ async def asr(
finally:
await audio_file.close()
task_queue.task_done()
delete_model()
if result:
@@ -665,7 +665,6 @@ async def detect_language(
finally:
#await audio_file.close()
task_queue.task_done()
delete_model()
return {"detected_language": detected_language.to_name(), "language_code": language_code}
@@ -796,14 +795,16 @@ def delete_model():
global model
if clear_vram_on_complete and task_queue.is_idle():
logging.debug("Queue idle; clearing model from memory.")
model.model.unload_model()
del model
model = None
if model:
model.model.unload_model()
del model
model = None
if transcribe_device.lower() == 'cuda' and torch.cuda.is_available():
torch.cuda.empty_cache()
logging.debug("CUDA cache cleared.")
if os.name != 'nt': # don't garbage collect on Windows, it will crash the script
gc.collect()
ctypes.CDLL(ctypes.util.find_library('c')).malloc_trim(0)
def isAudioFileExtension(file_extension):
return file_extension.casefold() in \
@@ -1139,6 +1140,7 @@ def find_language_audio_track(audio_tracks, find_languages):
if track['language'] == language:
return language
return None
def find_default_audio_track_language(audio_tracks):
"""
Finds the language of the default audio track in the given list of audio tracks.
@@ -1255,6 +1257,7 @@ def should_skip_file(file_path: str, target_language: LanguageCode) -> bool:
logging.debug(f"Processing {base_name}: No skip conditions met.")
return False
def get_subtitle_languages(video_path):
"""
Extract language codes from each audio stream in the video file using pyav.