Transcriptarr/backend/scanning/library_scanner.py

"""Library scanner with rule-based filtering and scheduling."""
import logging
import os
import time
from typing import List, Optional, Dict
from datetime import datetime, timezone
from pathlib import Path

from apscheduler.schedulers.background import BackgroundScheduler
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileCreatedEvent

from backend.core.database import database
from backend.core.queue_manager import queue_manager
from backend.core.models import QualityPreset
from backend.scanning.models import ScanRule
from backend.scanning.file_analyzer import FileAnalyzer, FileAnalysis
from backend.scanning.language_detector import language_detector
from backend.core.language_code import LanguageCode

logger = logging.getLogger(__name__)


class LibraryFileHandler(FileSystemEventHandler):
    """Watchdog handler for real-time file detection."""

    def __init__(self, scanner: "LibraryScanner"):
        """
        Initialize file handler.

        Args:
            scanner: Parent LibraryScanner instance
        """
        super().__init__()
        self.scanner = scanner

    def on_created(self, event: FileCreatedEvent):
        """
        Handle new file creation.

        Args:
            event: File creation event
        """
        if event.is_directory:
            return

        file_path = event.src_path

        # Check if it's a video file
        if not FileAnalyzer.is_video_file(file_path):
            return

        # Wait a bit for file to be fully written
        time.sleep(5)

        logger.info(f"New file detected: {file_path}")
        self.scanner.process_file(file_path)


class LibraryScanner:
    """
    Library scanner with rule-based filtering.

    Scans media libraries, analyzes files with ffprobe, and applies
    configurable rules to determine which files need transcription.

    Supports:
    - One-time manual scans
    - Scheduled periodic scans (cron-like)
    - Real-time file watching (Tdarr-style)
    """

    def __init__(self):
        """Initialize library scanner."""
        self.scheduler: Optional[BackgroundScheduler] = None
        self.file_observer: Optional[Observer] = None
        self.is_scanning = False
        self.last_scan_time: Optional[datetime] = None
        self.files_scanned = 0
        self.files_queued = 0

        logger.info("LibraryScanner initialized")

    def scan_libraries(self, paths: Optional[List[str]] = None) -> Dict:
        """
        Perform a one-time scan of library directories.

        Args:
            paths: List of directories to scan (uses config if None)

        Returns:
            Dictionary with scan statistics
        """
        if self.is_scanning:
            logger.warning("Scan already in progress")
            return {"error": "Scan already in progress"}

        self.is_scanning = True
        self.files_scanned = 0
        self.files_queued = 0
        scan_start = time.time()

        try:
            # Get paths from settings_service if not provided
            if paths is None:
                from backend.core.settings_service import settings_service
                library_paths = settings_service.get('library_paths', '')
                if not library_paths:
                    logger.error("No library paths configured")
                    return {"error": "No library paths configured"}
                # Handle both comma and pipe separators
                if '|' in library_paths:
                    paths = [p.strip() for p in library_paths.split("|") if p.strip()]
                else:
                    paths = [p.strip() for p in library_paths.split(",") if p.strip()]

            logger.info(f"Starting library scan: {len(paths)} paths")

            # Load all enabled rules
            rules = self._load_scan_rules()
            logger.info(f"Loaded {len(rules)} enabled scan rules")

            # Scan each path
            for path in paths:
                if not os.path.isdir(path):
                    logger.warning(f"Path not found or not a directory: {path}")
                    continue

                logger.info(f"Scanning: {path}")
                self._scan_directory(path, rules)

            scan_duration = time.time() - scan_start
            self.last_scan_time = datetime.now(timezone.utc)
            self._persist_scan_stats(files_in_this_scan=self.files_scanned)

            results = {
                "status": "completed",
                "files_scanned": self.files_scanned,
                "files_queued": self.files_queued,
                "duration_seconds": round(scan_duration, 2),
                "timestamp": self.last_scan_time.isoformat(),
            }

            logger.info(
                f"Scan completed: {self.files_scanned} files scanned, "
                f"{self.files_queued} jobs queued in {scan_duration:.1f}s"
            )

            return results

        except Exception as e:
            logger.error(f"Scan failed: {e}", exc_info=True)
            return {"error": str(e)}

        finally:
            self.is_scanning = False

    def _scan_directory(self, directory: str, rules: List[ScanRule]):
        """
        Recursively scan a directory.

        Args:
            directory: Directory path
            rules: List of scan rules to apply
        """
        try:
            for root, dirs, files in os.walk(directory):
                for file in files:
                    file_path = os.path.join(root, file)
                    self.files_scanned += 1

                    # Process file
                    self.process_file(file_path, rules)

        except Exception as e:
            logger.error(f"Error scanning directory {directory}: {e}")

    def process_file(
        self, file_path: str, rules: Optional[List[ScanRule]] = None
    ) -> bool:
        """
        Process a single file against scan rules.

        Args:
            file_path: Path to media file
            rules: Optional list of rules (will load if None)

        Returns:
            True if job was queued, False otherwise
        """
        try:
            # Analyze file
            analysis = FileAnalyzer.analyze_file(file_path)
            if not analysis:
                return False

            # Check if we need language detection
            if not analysis.default_audio_language or len(analysis.audio_languages) == 0:
                logger.info(
                    f"Audio language unknown for {analysis.file_name}, "
                    f"queuing language detection job"
                )
                return self._queue_language_detection_job(analysis)

            # Load rules if not provided
            if rules is None:
                rules = self._load_scan_rules()

            # Evaluate against rules
            matching_rule = self._evaluate_rules(analysis, rules)

            if matching_rule:
                # Queue job based on rule
                return self._queue_job_from_rule(analysis, matching_rule)

            return False

        except Exception as e:
            logger.error(f"Error processing file {file_path}: {e}")
            return False

    def _evaluate_rules(
        self, file_analysis: FileAnalysis, rules: List[ScanRule]
    ) -> Optional[ScanRule]:
        """
        Evaluate file against rules (in priority order).

        Args:
            file_analysis: File analysis result
            rules: List of scan rules

        Returns:
            First matching rule or None
        """
        for rule in rules:
            if self._rule_matches(file_analysis, rule):
                logger.debug(f"File {file_analysis.file_name} matches rule: {rule.name}")
                return rule

        return None

    def _rule_matches(self, file_analysis: FileAnalysis, rule: ScanRule) -> bool:
        """
        Check if a file matches a scan rule.

        Args:
            file_analysis: File analysis
            rule: Scan rule

        Returns:
            True if all conditions match
        """
        # Check if rule has any conditions defined
        has_conditions = any([
            rule.file_extension,
            rule.audio_language_is,
            rule.audio_language_not,
            rule.audio_track_count_min,
            rule.has_embedded_subtitle_lang,
            rule.missing_embedded_subtitle_lang,
            rule.missing_external_subtitle_lang
        ])

        if not has_conditions:
            logger.warning(
                f"Rule '{rule.name}' has no conditions - will match ALL files. "
                f"This is probably not what you want!"
            )

        # Check file extension filter
        if rule.file_extension:
            if file_analysis.file_extension not in rule.file_extension_list:
                return False

        # Check audio language IS
        if rule.audio_language_is:
            target_lang = LanguageCode.from_string(rule.audio_language_is)

            # Check if file has the target language
            has_target_lang = target_lang in file_analysis.audio_languages

            # Also check if file has undefined language (None) - will need detection
            has_undefined_lang = None in file_analysis.audio_languages or \
                                 any(lang is None for lang in file_analysis.audio_languages)

            if not has_target_lang:
                # If language is undefined, try to detect it with Whisper
                if has_undefined_lang:
                    logger.info(
                        f"File {file_analysis.file_name} has undefined audio language - "
                        f"attempting detection with Whisper..."
                    )

                    detected_lang = language_detector.detect_language(file_analysis.file_path)

                    if detected_lang:
                        logger.info(
                            f"Detected language for {file_analysis.file_name}: {detected_lang}"
                        )

                        # Check if detected language matches rule
                        if detected_lang == target_lang:
                            logger.info(
                                f"✓ Detected language '{detected_lang}' matches rule '{rule.name}'"
                            )
                            # Update file_analysis with detected language for later use
                            if file_analysis.audio_tracks:
                                file_analysis.audio_tracks[0].language = detected_lang
                            return True  # Continue checking other conditions
                        else:
                            logger.debug(
                                f"Rule '{rule.name}' failed: detected '{detected_lang}' "
                                f"but expected '{rule.audio_language_is}'"
                            )
                            return False
                    else:
                        logger.warning(
                            f"Failed to detect language for {file_analysis.file_name} - skipping"
                        )
                        return False
                else:
                    # Language is defined but doesn't match
                    logger.debug(
                        f"Rule '{rule.name}' audio check failed for {file_analysis.file_name}: "
                        f"Expected '{rule.audio_language_is}' but found "
                        f"{[str(lang) if lang else 'und' for lang in file_analysis.audio_languages]}"
                    )
                    return False


        # Check audio language NOT
        if rule.audio_language_not:
            excluded_langs = [
                LanguageCode.from_string(lang) for lang in rule.audio_language_not_list
            ]
            if any(lang in file_analysis.audio_languages for lang in excluded_langs):
                return False

        # Check minimum audio tracks
        if rule.audio_track_count_min:
            if len(file_analysis.audio_tracks) < rule.audio_track_count_min:
                return False

        # Check HAS embedded subtitle
        if rule.has_embedded_subtitle_lang:
            required_lang = LanguageCode.from_string(rule.has_embedded_subtitle_lang)
            if not file_analysis.has_embedded_subtitle_language(required_lang):
                return False

        # Check MISSING embedded subtitle
        if rule.missing_embedded_subtitle_lang:
            excluded_lang = LanguageCode.from_string(rule.missing_embedded_subtitle_lang)
            if file_analysis.has_embedded_subtitle_language(excluded_lang):
                return False

        # Check MISSING external subtitle
        if rule.missing_external_subtitle_lang:
            excluded_lang = LanguageCode.from_string(rule.missing_external_subtitle_lang)
            if file_analysis.has_external_subtitle_language(excluded_lang):
                return False

        # All conditions matched
        logger.debug(
            f"File '{file_analysis.file_name}' matched rule '{rule.name}' "
            f"(priority: {rule.priority})"
        )
        return True

    def _queue_language_detection_job(self, file_analysis: FileAnalysis) -> bool:
        """
        Create and queue a language detection job for a file with unknown audio language.

        Args:
            file_analysis: File analysis

        Returns:
            True if job was queued successfully
        """
        try:
            from backend.core.models import JobType, JobStatus

            # Check if there's already a completed detection job for this file
            with database.get_session() as session:
                from backend.core.models import Job
                existing_detection = session.query(Job).filter(
                    Job.file_path == file_analysis.file_path,
                    Job.job_type == JobType.LANGUAGE_DETECTION,
                    Job.status == JobStatus.COMPLETED
                ).first()

                if existing_detection:
                    logger.info(
                        f"✓ Language already detected for {file_analysis.file_name}, "
                        f"checking for transcription rules..."
                    )
                    # Extract detected language from SRT content
                    if existing_detection.srt_content:
                        # Format: "Language detected: ja (Japanese)\nConfidence: 99%"
                        lines = existing_detection.srt_content.split('\n')
                        if lines:
                            lang_line = lines[0]
                            if 'Language detected:' in lang_line:
                                lang_code = lang_line.split(':')[1].strip().split(' ')[0]
                                # Trigger rule checking with detected language
                                self._check_and_queue_transcription_for_file(
                                    file_analysis.file_path, lang_code
                                )
                    return False

            # Add language detection job with high priority
            job = queue_manager.add_job(
                file_path=file_analysis.file_path,
                file_name=file_analysis.file_name,
                source_lang=None,  # To be detected
                target_lang=None,
                quality_preset=QualityPreset.FAST,
                priority=15,  # Higher than normal transcription (0-10) but lower than manual (20+)
                transcribe_or_translate="transcribe",
                job_type=JobType.LANGUAGE_DETECTION,
            )

            if job:
                logger.info(
                    f"✓ Queued LANGUAGE DETECTION job {job.id} for {file_analysis.file_name}"
                )
                self.files_queued += 1
                return True
            else:
                logger.warning(
                    f"✗ Skipped detection for {file_analysis.file_name}: Job already exists"
                )
                return False

        except Exception as e:
            logger.error(f"Error queuing language detection job: {e}")
            return False

    def _check_and_queue_transcription_for_file(self, file_path: str, detected_lang_code: str):
        """
        Check if a file with detected language matches any scan rules and queue transcription.

        Args:
            file_path: Path to the file
            detected_lang_code: Detected language code (ISO 639-1, e.g., 'ja', 'en')
        """
        try:
            logger.info(
                f"Checking if {file_path} with language '{detected_lang_code}' "
                f"matches any scan rules..."
            )

            # Load scan rules
            rules = self._load_scan_rules()
            if not rules:
                logger.debug("No active scan rules found")
                return

            # Check each rule
            for rule in rules:
                # Check if language matches
                if rule.audio_language_is:
                    try:
                        rule_lang = LanguageCode.from_string(rule.audio_language_is)
                        # Convert detected language (ISO 639-1) to LanguageCode for comparison
                        detected_lang = LanguageCode.from_iso_639_1(detected_lang_code)

                        if detected_lang != rule_lang:
                            logger.debug(
                                f"Rule '{rule.name}' requires language {rule_lang}, "
                                f"but detected {detected_lang}"
                            )
                            continue
                    except Exception as e:
                        logger.warning(f"Could not parse rule language code: {e}")
                        continue

                # Check if language should be excluded
                if rule.audio_language_not:
                    excluded_langs = [
                        LanguageCode.from_string(lang.strip())
                        for lang in rule.audio_language_not.split(',')
                    ]
                    detected_lang_obj = LanguageCode.from_iso_639_1(detected_lang_code)
                    if detected_lang_obj in excluded_langs:
                        logger.debug(
                            f"Rule '{rule.name}' excludes language {detected_lang_code}"
                        )
                        continue

                # File matches this rule - queue transcription job
                logger.info(
                    f"File {file_path} matches rule '{rule.name}' - queueing transcription job"
                )

                # Get target language (use ISO 639-1 throughout)
                target_lang_code = rule.target_language or "eng"

                # Map quality preset
                quality_map = {
                    "fast": QualityPreset.FAST,
                    "balanced": QualityPreset.BALANCED,
                    "best": QualityPreset.BEST,
                }
                quality = quality_map.get(rule.quality_preset, QualityPreset.FAST)

                # Create transcription job
                # All language codes in ISO 639-1 format (ja, en, es)
                job = queue_manager.add_job(
                    file_path=file_path,
                    file_name=os.path.basename(file_path),
                    source_lang=detected_lang_code,  # ISO 639-1 (ja, en, es)
                    target_lang=target_lang_code,    # ISO 639-1 (es, en, fr, etc)
                    quality_preset=quality,
                    transcribe_or_translate=rule.action_type or "translate",
                    priority=rule.job_priority or 5,
                    is_manual_request=False,
                )

                if job:
                    logger.info(
                        f"✓ Queued transcription job {job.id} for {os.path.basename(file_path)}: "
                        f"{rule.action_type} {detected_lang_code} → {target_lang_code}"
                    )
                    self.files_queued += 1

                # Only queue once (first matching rule)
                return

            logger.debug(f"File {file_path} does not match any scan rules")

        except Exception as e:
            logger.error(
                f"Error checking scan rules for {file_path}: {e}",
                exc_info=True
            )

    def _queue_job_from_rule(
        self, file_analysis: FileAnalysis, rule: ScanRule
    ) -> bool:
        """
        Create and queue a job based on matched rule.

        Args:
            file_analysis: File analysis
            rule: Matched scan rule

        Returns:
            True if job was queued successfully
        """
        try:
            # Map quality preset
            quality_map = {
                "fast": QualityPreset.FAST,
                "balanced": QualityPreset.BALANCED,
                "best": QualityPreset.BEST,
            }
            quality_preset = quality_map.get(rule.quality_preset, QualityPreset.FAST)

            # Determine source language (default audio track)
            source_lang = file_analysis.default_audio_language
            source_lang_code = source_lang.to_iso_639_1() if source_lang else None

            # Add job to queue
            job = queue_manager.add_job(
                file_path=file_analysis.file_path,
                file_name=file_analysis.file_name,
                source_lang=source_lang_code,
                target_lang=rule.target_language,
                quality_preset=quality_preset,
                priority=rule.job_priority,
                transcribe_or_translate=rule.action_type,
            )

            if job:
                logger.info(
                    f"✓ Queued job {job.id} for {file_analysis.file_name}: "
                    f"{rule.action_type} {source_lang_code} → {rule.target_language}"
                )
                self.files_queued += 1
                return True
            else:
                logger.warning(
                    f"✗ Skipped {file_analysis.file_name}: Job already exists or in queue "
                    f"(path: {file_analysis.file_path}, target: {rule.target_language})"
                )
                return False

        except Exception as e:
            logger.error(f"Error queuing job: {e}")
            return False

    def _load_scan_rules(self) -> List[ScanRule]:
        """
        Load enabled scan rules from database.

        Returns:
            List of enabled rules (sorted by priority)
        """
        with database.get_session() as session:
            rules = (
                session.query(ScanRule)
                .filter(ScanRule.enabled == True)
                .order_by(ScanRule.priority.desc(), ScanRule.id)
                .all()
            )
            # Expunge rules from session so they can be used outside the context
            for rule in rules:
                session.expunge(rule)
            return rules

    def _persist_scan_stats(self, files_in_this_scan: int = 0):
        """
        Persist scan statistics to database for persistence across restarts.

        Args:
            files_in_this_scan: Number of files scanned in the current scan operation
        """
        from backend.core.settings_service import settings_service

        try:
            # Save last scan time
            if self.last_scan_time:
                settings_service.set(
                    'scanner_last_scan_time',
                    self.last_scan_time.isoformat(),
                    category='scanner'
                )

            # Increment scan count
            scan_count = settings_service.get('scanner_scan_count', 0)
            try:
                scan_count = int(scan_count)
            except (ValueError, TypeError):
                scan_count = 0

            scan_count += 1
            settings_service.set(
                'scanner_scan_count',
                str(scan_count),
                category='scanner'
            )

            # Save total files scanned (cumulative)
            if files_in_this_scan > 0:
                current_total = settings_service.get('scanner_total_files_scanned', 0)
                try:
                    current_total = int(current_total)
                except (ValueError, TypeError):
                    current_total = 0

                new_total = current_total + files_in_this_scan
                settings_service.set(
                    'scanner_total_files_scanned',
                    str(new_total),
                    category='scanner'
                )

                logger.debug(f"Persisted scan stats: scan_count={scan_count}, last_scan={self.last_scan_time}, total_files={new_total}")
            else:
                logger.debug(f"Persisted scan stats: scan_count={scan_count}, last_scan={self.last_scan_time}")
        except Exception as e:
            logger.error(f"Failed to persist scan stats: {e}")

    # === Scheduler Methods ===

    def start_scheduler(self, interval_minutes: Optional[int] = None):
        """
        Start scheduled periodic scanning.

        Args:
            interval_minutes: Scan interval (uses config if None)
        """
        if self.scheduler and self.scheduler.running:
            logger.warning("Scheduler already running")
            return

        from backend.core.settings_service import settings_service
        interval = interval_minutes or int(settings_service.get('scanner_schedule_interval_minutes', 360))

        self.scheduler = BackgroundScheduler()
        self.scheduler.add_job(
            func=self.scan_libraries,
            trigger="interval",
            minutes=interval,
            id="library_scan",
            name=f"Library scan (every {interval}m)",
        )
        self.scheduler.start()

        logger.info(f"Scheduler started: scanning every {interval} minutes")

    def stop_scheduler(self):
        """Stop scheduled scanning."""
        if self.scheduler and self.scheduler.running:
            try:
                # wait=False to avoid blocking on running jobs
                self.scheduler.shutdown(wait=False)
            except Exception as e:
                logger.warning(f"Error shutting down scheduler: {e}")
            self.scheduler = None
            logger.info("Scheduler stopped")

    # === File Watcher Methods ===

    def start_file_watcher(self, paths: Optional[List[str]] = None, recursive: bool = True):
        """
        Start real-time file watching.

        Args:
            paths: Paths to watch (uses config if None)
            recursive: Whether to watch subdirectories
        """
        if self.file_observer:
            logger.warning("File watcher already running")
            return

        # Get paths from settings_service if not provided
        if paths is None:
            from backend.core.settings_service import settings_service
            library_paths = settings_service.get('library_paths', '')
            if not library_paths:
                logger.error("No library paths configured")
                return
            # Handle both comma and pipe separators
            if '|' in library_paths:
                paths = [p.strip() for p in library_paths.split("|") if p.strip()]
            else:
                paths = [p.strip() for p in library_paths.split(",") if p.strip()]

        self.file_observer = Observer()
        handler = LibraryFileHandler(self)

        for path in paths:
            if os.path.isdir(path):
                self.file_observer.schedule(handler, path, recursive=recursive)
                logger.info(f"Watching: {path} (recursive={recursive})")

        self.file_observer.start()
        logger.info("File watcher started")

    def stop_file_watcher(self):
        """Stop real-time file watching."""
        if self.file_observer:
            try:
                self.file_observer.stop()
                # Use timeout to avoid blocking indefinitely
                self.file_observer.join(timeout=5.0)
            except Exception as e:
                logger.warning(f"Error stopping file watcher: {e}")
            self.file_observer = None
            logger.info("File watcher stopped")

    def get_status(self) -> Dict:
        """
        Get scanner status.

        Returns:
            Dictionary with scanner status
        """
        from backend.core.settings_service import settings_service

        watched_paths = []
        if self.file_observer:
            # Get watched paths from observer
            watched_paths = [str(w.path) for w in self.file_observer.emitters]

        next_scan_time = None
        if self.scheduler and self.scheduler.running:
            # Get next scheduled job time
            jobs = self.scheduler.get_jobs()
            if jobs:
                next_scan_time = jobs[0].next_run_time.isoformat()

        # Get last_scan_time from database (persisted) or memory (current session)
        last_scan_time = self.last_scan_time
        if last_scan_time is None:
            # Try to load from database
            db_last_scan = settings_service.get('scanner_last_scan_time')
            if db_last_scan:
                try:
                    last_scan_time = datetime.fromisoformat(db_last_scan)
                except ValueError:
                    last_scan_time = None

        # Get scan count from database
        scan_count = settings_service.get('scanner_scan_count', 0)
        try:
            scan_count = int(scan_count)
        except (ValueError, TypeError):
            scan_count = 0

        # Get total_files_scanned from database
        total_files_scanned = settings_service.get('scanner_total_files_scanned', 0)
        try:
            total_files_scanned = int(total_files_scanned)
        except (ValueError, TypeError):
            total_files_scanned = 0

        return {
            "scheduler_enabled": self.scheduler is not None,
            "scheduler_running": self.scheduler is not None and self.scheduler.running,
            "next_scan_time": next_scan_time,
            "watcher_enabled": self.file_observer is not None,
            "watcher_running": self.file_observer is not None,
            "watched_paths": watched_paths,
            "last_scan_time": last_scan_time.isoformat() if last_scan_time else None,
            "total_scans": scan_count,
            "total_files_scanned": total_files_scanned,
        }

    def scan_paths(self, paths: List[str], recursive: bool = True) -> Dict:
        """
        Scan specific paths.

        Args:
            paths: List of paths to scan
            recursive: Whether to scan subdirectories

        Returns:
            Scan result dictionary
        """
        if self.is_scanning:
            logger.warning("Scan already in progress")
            return {
                "scanned_files": 0,
                "matched_files": 0,
                "jobs_created": 0,
                "skipped_files": 0,
                "paths_scanned": [],
                "error": "Scan already in progress"
            }

        self.is_scanning = True
        scanned = 0
        matched = 0
        jobs_created = 0
        skipped = 0

        try:

            for path in paths:
                if not os.path.exists(path):
                    logger.warning(f"Path does not exist: {path}")
                    continue

                # Scan directory
                if os.path.isdir(path):
                    for root, dirs, files in os.walk(path):
                        for file in files:
                            file_path = os.path.join(root, file)

                            if not FileAnalyzer.is_video_file(file_path):
                                continue

                            scanned += 1

                            # Process file
                            if self.process_file(file_path):
                                matched += 1
                                jobs_created += 1
                            else:
                                skipped += 1

                        if not recursive:
                            break

                # Single file
                elif os.path.isfile(path):
                    if FileAnalyzer.is_video_file(path):
                        scanned += 1
                        if self.process_file(path):
                            matched += 1
                            jobs_created += 1
                        else:
                            skipped += 1

            self.last_scan_time = datetime.now(timezone.utc)
            self.files_scanned += scanned
            self._persist_scan_stats(files_in_this_scan=scanned)

            return {
                "scanned_files": scanned,
                "matched_files": matched,
                "jobs_created": jobs_created,
                "skipped_files": skipped,
                "paths_scanned": paths,
            }

        finally:
            self.is_scanning = False


# Global scanner instance
library_scanner = LibraryScanner()