Compare commits
10 Commits
512a8a473b
...
ad0bdba03d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad0bdba03d | ||
|
|
11cca018ae | ||
|
|
948d44ca66 | ||
|
|
809f1a055d | ||
|
|
594b78b84f | ||
|
|
7373f9a87b | ||
|
|
1f428eabf9 | ||
|
|
a0778e7220 | ||
|
|
6e99826c9e | ||
|
|
8bbf9eff6f |
41
.github/workflows/build_GPU.yml
vendored
41
.github/workflows/build_GPU.yml
vendored
@@ -3,17 +3,17 @@ name: Build_Subgen_Dockerfile_GPU
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
paths:
|
paths:
|
||||||
- requirements.txt
|
- 'requirements.txt'
|
||||||
- Dockerfile
|
- 'Dockerfile'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
docker:
|
docker:
|
||||||
runs-on: ubuntu-latest
|
runs-on: [self-hosted]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
@@ -21,27 +21,7 @@ jobs:
|
|||||||
id: get_version
|
id: get_version
|
||||||
run: |
|
run: |
|
||||||
version=$(grep -oP "subgen_version\s*=\s*'\K[^']+" subgen.py)
|
version=$(grep -oP "subgen_version\s*=\s*'\K[^']+" subgen.py)
|
||||||
echo "version=$version" >> "$GITHUB_ENV"
|
echo "version=$version" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Pre-job:free disk space
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
echo "Disk before cleanup:"
|
|
||||||
df -h /
|
|
||||||
|
|
||||||
# Prune buildx builder data and docker caches
|
|
||||||
sudo docker builder prune --all --force || true
|
|
||||||
sudo docker system prune -af --volumes || true
|
|
||||||
|
|
||||||
# Remove temp files
|
|
||||||
sudo rm -rf /tmp/* || true
|
|
||||||
|
|
||||||
# Remove old buildkit cache if present
|
|
||||||
sudo rm -rf /var/lib/docker/buildkit || true
|
|
||||||
|
|
||||||
echo "Disk after cleanup:"
|
|
||||||
df -h /
|
|
||||||
|
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
@@ -57,14 +37,3 @@ jobs:
|
|||||||
tags: |
|
tags: |
|
||||||
mccloud/subgen:latest
|
mccloud/subgen:latest
|
||||||
mccloud/subgen:${{ env.version }}
|
mccloud/subgen:${{ env.version }}
|
||||||
|
|
||||||
- name: Post-job:cleanup local docker artifacts
|
|
||||||
if: always()
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
sudo docker builder prune --all --force || true
|
|
||||||
sudo docker image prune -af || true
|
|
||||||
sudo docker container prune -f || true
|
|
||||||
sudo docker system prune -af --volumes || true
|
|
||||||
df -h /
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# Stage 1: Builder
|
# Stage 1: Builder
|
||||||
FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS builder
|
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS builder
|
||||||
|
|
||||||
WORKDIR /subgen
|
WORKDIR /subgen
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Stage 2: Runtime
|
# Stage 2: Runtime
|
||||||
FROM nvidia/cuda:12.6.3-base-ubuntu22.04
|
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
|
||||||
|
|
||||||
WORKDIR /subgen
|
WORKDIR /subgen
|
||||||
|
|
||||||
|
|||||||
29
subgen.py
29
subgen.py
@@ -1,4 +1,4 @@
|
|||||||
subgen_version = '2025.11.1'
|
subgen_version = '2026.01.5'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ENVIRONMENT VARIABLES DOCUMENTATION
|
ENVIRONMENT VARIABLES DOCUMENTATION
|
||||||
@@ -74,6 +74,7 @@ from io import BytesIO
|
|||||||
import io
|
import io
|
||||||
import asyncio
|
import asyncio
|
||||||
import torch
|
import torch
|
||||||
|
import ctypes, ctypes.util
|
||||||
from typing import List
|
from typing import List
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
@@ -265,6 +266,7 @@ task_queue = DeduplicatedQueue()
|
|||||||
|
|
||||||
def transcription_worker():
|
def transcription_worker():
|
||||||
while True:
|
while True:
|
||||||
|
task = None
|
||||||
try:
|
try:
|
||||||
task = task_queue.get(block=True, timeout=1)
|
task = task_queue.get(block=True, timeout=1)
|
||||||
if "type" in task and task["type"] == "detect_language":
|
if "type" in task and task["type"] == "detect_language":
|
||||||
@@ -274,15 +276,14 @@ def transcription_worker():
|
|||||||
else:
|
else:
|
||||||
logging.info(f"Task {task['path']} is being handled by Subgen.")
|
logging.info(f"Task {task['path']} is being handled by Subgen.")
|
||||||
gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language'])
|
gen_subtitles(task['path'], task['transcribe_or_translate'], task['force_language'])
|
||||||
task_queue.task_done()
|
|
||||||
# show queue
|
|
||||||
logging.debug(f"Queue status: {task_queue.qsize()} tasks remaining")
|
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue # This is ok, as we have a timeout, nothing needs to be printed
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error processing task: {e}", exc_info=True) # Log the error and the traceback
|
logging.error(f"Error processing task: {e}", exc_info=True)
|
||||||
else:
|
finally:
|
||||||
delete_model() # Call delete_model() *only* if no exception occurred
|
if task: # Ensure a task was actually retrieved before calling task_done
|
||||||
|
task_queue.task_done()
|
||||||
|
delete_model()
|
||||||
|
|
||||||
for _ in range(concurrent_transcriptions):
|
for _ in range(concurrent_transcriptions):
|
||||||
threading.Thread(target=transcription_worker, daemon=True).start()
|
threading.Thread(target=transcription_worker, daemon=True).start()
|
||||||
@@ -585,7 +586,6 @@ async def asr(
|
|||||||
|
|
||||||
finally:
|
finally:
|
||||||
await audio_file.close()
|
await audio_file.close()
|
||||||
task_queue.task_done()
|
|
||||||
delete_model()
|
delete_model()
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
@@ -665,7 +665,6 @@ async def detect_language(
|
|||||||
|
|
||||||
finally:
|
finally:
|
||||||
#await audio_file.close()
|
#await audio_file.close()
|
||||||
task_queue.task_done()
|
|
||||||
delete_model()
|
delete_model()
|
||||||
|
|
||||||
return {"detected_language": detected_language.to_name(), "language_code": language_code}
|
return {"detected_language": detected_language.to_name(), "language_code": language_code}
|
||||||
@@ -796,14 +795,16 @@ def delete_model():
|
|||||||
global model
|
global model
|
||||||
if clear_vram_on_complete and task_queue.is_idle():
|
if clear_vram_on_complete and task_queue.is_idle():
|
||||||
logging.debug("Queue idle; clearing model from memory.")
|
logging.debug("Queue idle; clearing model from memory.")
|
||||||
model.model.unload_model()
|
if model:
|
||||||
del model
|
model.model.unload_model()
|
||||||
model = None
|
del model
|
||||||
|
model = None
|
||||||
if transcribe_device.lower() == 'cuda' and torch.cuda.is_available():
|
if transcribe_device.lower() == 'cuda' and torch.cuda.is_available():
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
logging.debug("CUDA cache cleared.")
|
logging.debug("CUDA cache cleared.")
|
||||||
if os.name != 'nt': # don't garbage collect on Windows, it will crash the script
|
if os.name != 'nt': # don't garbage collect on Windows, it will crash the script
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
ctypes.CDLL(ctypes.util.find_library('c')).malloc_trim(0)
|
||||||
|
|
||||||
def isAudioFileExtension(file_extension):
|
def isAudioFileExtension(file_extension):
|
||||||
return file_extension.casefold() in \
|
return file_extension.casefold() in \
|
||||||
@@ -1139,6 +1140,7 @@ def find_language_audio_track(audio_tracks, find_languages):
|
|||||||
if track['language'] == language:
|
if track['language'] == language:
|
||||||
return language
|
return language
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_default_audio_track_language(audio_tracks):
|
def find_default_audio_track_language(audio_tracks):
|
||||||
"""
|
"""
|
||||||
Finds the language of the default audio track in the given list of audio tracks.
|
Finds the language of the default audio track in the given list of audio tracks.
|
||||||
@@ -1255,6 +1257,7 @@ def should_skip_file(file_path: str, target_language: LanguageCode) -> bool:
|
|||||||
logging.debug(f"Processing {base_name}: No skip conditions met.")
|
logging.debug(f"Processing {base_name}: No skip conditions met.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_subtitle_languages(video_path):
|
def get_subtitle_languages(video_path):
|
||||||
"""
|
"""
|
||||||
Extract language codes from each audio stream in the video file using pyav.
|
Extract language codes from each audio stream in the video file using pyav.
|
||||||
|
|||||||
Reference in New Issue
Block a user