Update subgen.py
This commit is contained in:
50
subgen.py
50
subgen.py
@@ -65,7 +65,7 @@ reload_script_on_change = convert_to_bool(os.getenv('RELOAD_SCRIPT_ON_CHANGE', F
|
||||
lrc_for_audio_files = convert_to_bool(os.getenv('LRC_FOR_AUDIO_FILES', True))
|
||||
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
|
||||
detect_language_length = int(os.getenv('DETECT_LANGUAGE_LENGTH', 30))
|
||||
detect_language_offset = int(os.getenv('DETECT_LANGUAGE_START_OFFSET', 90))
|
||||
detect_language_offset = int(os.getenv('DETECT_LANGUAGE_START_OFFSET', 0))
|
||||
skipifexternalsub = convert_to_bool(os.getenv('SKIPIFEXTERNALSUB', False))
|
||||
skip_if_to_transcribe_sub_already_exist = convert_to_bool(os.getenv('SKIP_IF_TO_TRANSCRIBE_SUB_ALREADY_EXIST', True))
|
||||
skipifinternalsublang = LanguageCode.from_string(os.getenv('SKIPIFINTERNALSUBLANG', ''))
|
||||
@@ -464,8 +464,8 @@ async def detect_language(
|
||||
task_id = { 'path': f"Bazarr-detect-language-{random_name}" }
|
||||
task_queue.put(task_id)
|
||||
args = {}
|
||||
sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
|
||||
logging.info(f"Sample rate is: {sample_rate}")
|
||||
#sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
|
||||
#logging.info(f"Sample rate is: {sample_rate}")
|
||||
audio_file.file.seek(0)
|
||||
args['progress_callback'] = progress
|
||||
|
||||
@@ -474,7 +474,7 @@ async def detect_language(
|
||||
args['input_sr'] = 16000
|
||||
else:
|
||||
#args['audio'] = whisper.pad_or_trim(np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0, args['input_sr'] * int(detect_language_length))
|
||||
args['audio'] = extract_audio_segment_to_memory(audio_file, detect_language_offset, detect_language_length).read()
|
||||
args['audio'] = await get_audio_chunk(audio_file, detect_lang_offset, detect_lang_length)
|
||||
args['input_sr'] = 16000
|
||||
|
||||
args.update(kwargs)
|
||||
@@ -488,12 +488,45 @@ async def detect_language(
|
||||
logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}")
|
||||
|
||||
finally:
|
||||
await audio_file.close()
|
||||
#await audio_file.close()
|
||||
task_queue.task_done()
|
||||
delete_model()
|
||||
|
||||
return {"detected_language": detected_language.to_name(), "language_code": language_code}
|
||||
|
||||
async def get_audio_chunk(audio_file, offset=detect_language_offset, length=detect_language_length, sample_rate=16000, audio_format=np.int16):
|
||||
"""
|
||||
Extract a chunk of audio from a file, starting at the given offset and of the given length.
|
||||
|
||||
:param audio_file: The audio file (UploadFile or file-like object).
|
||||
:param offset: The offset in seconds to start the extraction.
|
||||
:param length: The length in seconds for the chunk to be extracted.
|
||||
:param sample_rate: The sample rate of the audio (default 16000).
|
||||
:param audio_format: The audio format to interpret (default int16, 2 bytes per sample).
|
||||
|
||||
:return: A numpy array containing the extracted audio chunk.
|
||||
"""
|
||||
|
||||
# Number of bytes per sample (for int16, 2 bytes per sample)
|
||||
bytes_per_sample = np.dtype(audio_format).itemsize
|
||||
|
||||
# Calculate the start byte based on offset and sample rate
|
||||
start_byte = offset * sample_rate * bytes_per_sample
|
||||
|
||||
# Calculate the length in bytes based on the length in seconds
|
||||
length_in_bytes = length * sample_rate * bytes_per_sample
|
||||
|
||||
# Seek to the start position (this assumes the audio_file is a file-like object)
|
||||
await audio_file.seek(start_byte)
|
||||
|
||||
# Read the required chunk of audio (length_in_bytes)
|
||||
chunk = await audio_file.read(length_in_bytes)
|
||||
|
||||
# Convert the chunk into a numpy array (normalized to float32)
|
||||
audio_data = np.frombuffer(chunk, dtype=audio_format).flatten().astype(np.float32) / 32768.0
|
||||
|
||||
return audio_data
|
||||
|
||||
def detect_language_task(path):
|
||||
detected_language = LanguageCode.NONE
|
||||
language_code = 'und'
|
||||
@@ -570,6 +603,13 @@ def extract_audio_segment_to_memory(input_file, start_time, duration):
|
||||
logging.error(f"Error: {str(e)}")
|
||||
return None
|
||||
|
||||
except ffmpeg.Error as e:
|
||||
logging.error(f"FFmpeg error: {e.stderr.decode()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error: {str(e)}")
|
||||
return None
|
||||
|
||||
def start_model():
|
||||
global model
|
||||
if model is None:
|
||||
|
||||
Reference in New Issue
Block a user