Unload model after 60 seconds inactivity. Attempt to clear more cuda vram.

2025-02-06 11:06:59 -07:00
parent 97bc516ec9
commit 5e60a4a6c2
1 changed files with 89 additions and 11 deletions
--- a/subgen.py
+++ b/subgen.py
@@ -1,4 +1,4 @@
-subgen_version = '2025.02.54'
+subgen_version = '2025.02.55'
 from language_code import LanguageCode
 from datetime import datetime
@@ -682,18 +682,96 @@ def extract_audio_segment_to_memory(input_file, start_time, duration):
        logging.error(f"Error: {str(e)}")
        return None
-def start_model():
+# --- Global Model Variables ---
-    global model
+_model_loading = False
-    if model is None:
+_model_lock = threading.Lock()  # Protects access to `model` and `_model_loading`
-        logging.debug("Model was purged, need to re-create")
+_unload_timer = None
-        model = stable_whisper.load_faster_whisper(whisper_model, download_root=model_location, device=transcribe_device, cpu_threads=whisper_threads, num_workers=concurrent_transcriptions, compute_type=compute_type)
+_unload_timer_lock = threading.Lock()
 _unload_delay_seconds = 60  # Adjust as needed
 # --- Model Loading/Unloading Functions ---
 async def _load_model():
    """Asynchronously load the Whisper model."""
    global model, _model_loading
    logging.debug("Starting asynchronous model load...")
    try:
        with _model_lock:
            _model_loading = True
        model = load_faster_whisper(
            whisper_model,
            download_root=model_location,
            device=transcribe_device,
            cpu_threads=whisper_threads,
            num_workers=concurrent_transcriptions,
            compute_type=compute_type
        )
        logging.debug("Model loaded asynchronously.")
    except Exception as e:
        logging.error(f"Error loading model asynchronously: {e}")
        # Handle the error (e.g., set model to None, log it, etc.)
        with _model_lock:
            model = None  # Ensure model is None on load failure
    finally:
        with _model_lock:
            _model_loading = False
 def _unload_model_callback():
    """Callback function to unload the model after a delay."""
    with _unload_timer_lock:
        delete_model()  # Call delete_model inside the lock
 def delete_model():
-    global model
+    """Unload the Whisper model, clear memory, and cancel any pending timers."""
-    if clear_vram_on_complete and task_queue.is_idle():
+    global model, _unload_timer
-        logging.debug("Queue idle; clearing model from memory.")
+
-        model = None
+    with _model_lock:
-    gc.collect()
+        if model is not None:
            logging.debug("Clearing model from memory (delayed).")
            model = None
            # Explicitly release CUDA memory if using CUDA *and* the device is CUDA
            try:
                if transcribe_device.lower() == 'cuda' and torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    logging.debug("CUDA cache cleared.")
                gc.collect()
            except Exception as e:
                logging.error(f"Error clearing CUDA cache: {e}")
    # Cancel the unload timer
    with _unload_timer_lock:
        if _unload_timer:
            try:  # Add a try-except block in case the timer has already been cancelled
                _unload_timer.cancel()
            except ValueError:
                pass  # Timer was already cancelled
            _unload_timer = None
 def start_model():
    """Start the model loading process (asynchronously) or reset the unload timer."""
    global model, _unload_timer
    with _model_lock:
        if model is None and not _model_loading:
            logging.debug("Starting model loading")
            asyncio.create_task(_load_model())  # Start loading in background
        elif _model_loading:
            logging.debug("Model is currently loading...")
        else:
            logging.debug("Model is already loaded.")
    # Reset the timer if the model is loaded or loading
    with _unload_timer_lock:
        if _unload_timer:
            try:
                _unload_timer.cancel()
            except ValueError:
                pass
        _unload_timer = threading.Timer(_unload_delay_seconds, _unload_model_callback)
        _unload_timer.daemon = True
        _unload_timer.start()
 def isAudioFileExtension(file_extension):
    return file_extension.casefold() in \