Update subgen.py
This commit is contained in:
35
subgen.py
35
subgen.py
@@ -384,11 +384,10 @@ async def asr(
|
|||||||
file_content = audio_file.file.read()
|
file_content = audio_file.file.read()
|
||||||
|
|
||||||
if encode:
|
if encode:
|
||||||
args['audio'] = file_content()
|
args['audio'] = file_content
|
||||||
else:
|
else:
|
||||||
args['audio'] = np.frombuffer(file_content, np.int16).flatten().astype(np.float32) / 32768.0
|
args['audio'] = np.frombuffer(file_content, np.int16).flatten().astype(np.float32) / 32768.0
|
||||||
|
args['input_sr'] = 16000
|
||||||
args['input_sr'] = 16000
|
|
||||||
|
|
||||||
if custom_regroup:
|
if custom_regroup:
|
||||||
args['regroup'] = custom_regroup
|
args['regroup'] = custom_regroup
|
||||||
@@ -429,8 +428,9 @@ async def asr(
|
|||||||
@app.post("/detect-language")
|
@app.post("/detect-language")
|
||||||
async def detect_language(
|
async def detect_language(
|
||||||
audio_file: UploadFile = File(...),
|
audio_file: UploadFile = File(...),
|
||||||
#encode: bool = Query(default=True, description="Encode audio first through ffmpeg") # This is always false from Bazarr
|
encode: bool = Query(default=True, description="Encode audio first through ffmpeg"), # This is always false from Bazarr
|
||||||
detect_lang_length: int = Query(default=30, description="Detect language on the first X seconds of the file")
|
detect_lang_length: int = Query(default=detect_language_length, description="Detect language on X seconds of the file"),
|
||||||
|
detect_lang_offset: int = Query(default=detect_language_start_offset, description="Start Detect language X seconds into the file")
|
||||||
):
|
):
|
||||||
|
|
||||||
if force_detected_language_to:
|
if force_detected_language_to:
|
||||||
@@ -441,16 +441,22 @@ async def detect_language(
|
|||||||
"language_code": force_detected_language_to.to_iso_639_1()
|
"language_code": force_detected_language_to.to_iso_639_1()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
global detect_language_length, detect_language_offset
|
||||||
detected_language = LanguageCode.NONE
|
detected_language = LanguageCode.NONE
|
||||||
language_code = 'und'
|
language_code = 'und'
|
||||||
if force_detected_language_to:
|
if force_detected_language_to:
|
||||||
logging.info(f"ENV FORCE_DETECTED_LANGUAGE_TO is set: Forcing detected language to {force_detected_language_to}\n Returning without detection")
|
logging.info(f"ENV FORCE_DETECTED_LANGUAGE_TO is set: Forcing detected language to {force_detected_language_to}\n Returning without detection")
|
||||||
return {"detected_language": force_detected_language_to.to_name(), "language_code": force_detected_language_to.to_iso_639_1()}
|
return {"detected_language": force_detected_language_to.to_name(), "language_code": force_detected_language_to.to_iso_639_1()}
|
||||||
if int(detect_lang_length) != 30:
|
|
||||||
global detect_language_length
|
# Log custom detection time settings if modified
|
||||||
|
if detect_lang_length != detect_language_length:
|
||||||
|
logging.info(f"Detecting language on the first {detect_lang_length} seconds of the audio.")
|
||||||
detect_language_length = detect_lang_length
|
detect_language_length = detect_lang_length
|
||||||
if int(detect_language_length) != 30:
|
|
||||||
logging.info(f"Detect language is set to detect on the first {detect_language_length} seconds of the audio.")
|
if detect_lang_offset != detect_language_start_offset:
|
||||||
|
logging.info(f"Offsetting language detection by {detect_language_start_offset} seconds.")
|
||||||
|
detect_language_offset_length = detect_lang_offset
|
||||||
|
audio_file = extract_audio_segment_to_memory(audio_file, detect_language_start_offset, detect_language_length)
|
||||||
try:
|
try:
|
||||||
start_model()
|
start_model()
|
||||||
random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6))
|
random_name = ''.join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=6))
|
||||||
@@ -458,11 +464,16 @@ async def detect_language(
|
|||||||
task_id = { 'path': f"Bazarr-detect-language-{random_name}" }
|
task_id = { 'path': f"Bazarr-detect-language-{random_name}" }
|
||||||
task_queue.put(task_id)
|
task_queue.put(task_id)
|
||||||
args = {}
|
args = {}
|
||||||
#sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
|
sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
|
||||||
|
logging.info(f"Sample rate is: {sample_rate}")
|
||||||
audio_file.file.seek(0)
|
audio_file.file.seek(0)
|
||||||
args['progress_callback'] = progress
|
args['progress_callback'] = progress
|
||||||
args['input_sr'] = 16000
|
|
||||||
args['audio'] = whisper.pad_or_trim(np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0, args['input_sr'] * int(detect_language_length))
|
if encode:
|
||||||
|
args['audio'] = whisper.pad_or_trim(audio_file.file.read() , sample_rate * int(detect_language_length))
|
||||||
|
else:
|
||||||
|
args['audio'] = whisper.pad_or_trim(np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0, args['input_sr'] * int(detect_language_length))
|
||||||
|
args['input_sr'] = 16000
|
||||||
|
|
||||||
args.update(kwargs)
|
args.update(kwargs)
|
||||||
detected_language = LanguageCode.from_name(model.transcribe_stable(**args).language)
|
detected_language = LanguageCode.from_name(model.transcribe_stable(**args).language)
|
||||||
|
|||||||
Reference in New Issue
Block a user