Files
Transcriptarr/subgen/subgen.py
McCloudS ab918bc8ba Update subgen.py
Cleaned up some of the environmental variables and added the ability to not generate subtitles if the file has an internal sub of the language you want.  This has to be set in SKIPIFINTERNALSUBLANG with a 3 letter code, IE "SKIPIFINTERNALSUBLANG=eng" (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).

Script will now default to values listed in the readme (frontpage)
2023-01-30 14:44:42 -07:00

110 lines
4.3 KiB
Python

import sys
import os
import time
import json
import glob
import pathlib
import webhook_listener
import subprocess
# parse our arguments from the Dockerfile
whisper_model = os.getenv('WHISPER_MODEL') or "medium"
whisper_speedup = bool(os.getenv('WHISPER_SPEEDUP'))
whisper_threads = os.getenv('WHISPER_THREADS') or "4"
whisper_processors = os.getenv('WHISPER_PROCESSORS') or "1"
procaddedmedia = bool(os.getenv('PROCADDEDMEDIA')) or True
procmediaonplay = bool(os.getenv('PROCMEDIAONPLAY'))
namesublang = os.getenv('NAMESUBLANG') or "aa"
updaterepo = bool(os.getenv('UPDATEREPO')) or True
skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG') or "eng"
def process_post_request(request, *args, **kwargs):
print("Received a webhook!")
if int(request.headers.get('Content-Length', 0)) > 0:
body = request.body.read(
int(request.headers['Content-Length'])).decode()
else:
body = '{}'
print(body)
fullpath = json.loads(body)['file']
filename = json.loads(body)['filename']
event = json.loads(body)['event']
filepath = os.path.dirname(fullpath)
extension = pathlib.Path(filename).suffix
filenamenoextension = filename.replace(extension, "")
print("fullpath: " + fullpath)
print("filename: " + filename)
print("filepath: " + filepath)
print("extension: " + extension)
print("file name with no extension: " + filenamenoextension)
print("event: " + event)
file_has_internal_sub = skipifinternalsublang in str(subprocess.check_output("ffprobe -loglevel error -select_streams s -show_entries stream=index:stream_tags=language -of csv=p=0 \"{}\"".format(fullpath), shell=True)) # skips generation if an internal sub exists
if file_has_internal_sub:
print("File already has an internal sub we want, skipping generation")
if ((procaddedmedia and event == "added") or (procmediaonplay and event == "played")) and (len(glob.glob("{}/{}*subgen*".format(filepath, filenamenoextension))) == 0) and not os.path.isfile("{}.output.wav".format(fullpath)) and not file_has_internal_sub: #glob nonsense checks if there exists a subgen file already and won't make a new one
if whisper_speedup:
print("This is a speedup run!")
print(os.getenv('WHISPER_SPEEDUP'))
finalsubname = "{0}/{1}.subgen.{2}.speedup.{3}".format(
filepath, filenamenoextension, whisper_model, namesublang)
else:
print("No speedup")
finalsubname = "{0}/{1}.subgen.{2}.{3}".format(
filepath, filenamenoextension, whisper_model, namesublang)
gen_subtitles(fullpath, "{}.output.wav".format(fullpath), finalsubname)
if os.path.isfile("{}.output.wav".format(fullpath)):
print("Deleting WAV workfile")
os.remove("{}.output.wav".format(fullpath))
return
def gen_subtitles(filename, inputwav, finalsubname):
strip_audio(filename)
run_whisper(inputwav, finalsubname)
def strip_audio(filename):
print("Starting strip audio")
command = "ffmpeg -y -i \"{}\" -ar 16000 -ac 1 -c:a pcm_s16le \"{}.output.wav\"".format(
filename, filename)
print("Command: " + command)
subprocess.call(command, shell=True)
print("Done stripping audio")
def run_whisper(inputwav, finalsubname):
print("Starting whisper")
os.chdir("/whisper.cpp")
command = "./main -m models/ggml-{}.bin -of \"{}\" -t {} -p {} -osrt -f \"{}\"" .format(
whisper_model, finalsubname, whisper_threads, whisper_processors, inputwav)
if (whisper_speedup):
command = command.replace("-osrt", "-osrt -su")
print("Command: " + command)
subprocess.call(command, shell=True)
print("Done with whisper")
if not os.path.isdir("/whisper.cpp"):
os.mkdir("/whisper.cpp")
os.chdir("/whisper.cpp")
subprocess.call("git clone https://github.com/ggerganov/whisper.cpp .", shell=True)
if updaterepo:
print("Updating repo!")
subprocess.call("git pull", shell=True)
if os.path.isfile("/whisper.cpp/samples/jfk.wav"):
print("Deleting sample file")
os.remove("/whisper.cpp/samples/jfk.wav")
subprocess.call("make " + whisper_model, shell=True)
print("Starting webhook!")
webhooks = webhook_listener.Listener(handlers={"POST": process_post_request})
webhooks.start()
print("Webhook started")
while True:
print("Still alive...")
time.sleep(300)