Update subgen.py

Giant fail, reverting back
This commit is contained in:
McCloudS
2023-10-19 00:52:16 -06:00
committed by GitHub
parent d16121070c
commit 49dca592be

View File

@@ -1,55 +1,51 @@
import sys
import os import os
import time
import json import json
import glob
import pathlib
import requests import requests
import xml.etree.ElementTree as ET import subprocess
import threading
import stable_whisper
import av
from flask import Flask, request from flask import Flask, request
import xml.etree.ElementTree as ET
def convert_to_bool(in_bool): def converttobool(in_bool):
value = str(in_bool).lower() value = str(in_bool).lower()
return value not in ('false', 'off', '0') if value in ('false', 'off', '0'):
return False
else:
return True
# Replace your getenv calls with appropriate default values here # parse our arguments from environment variables
plextoken = os.getenv('PLEXTOKEN', "token here") plextoken = os.getenv('PLEXTOKEN', "tokenhere")
plexserver = os.getenv('PLEXSERVER', "http://192.168.1.111:32400") plexserver = os.getenv('PLEXSERVER', "http://plex:32400")
whisper_model = os.getenv('WHISPER_MODEL', "medium") whisper_model = os.getenv('WHISPER_MODEL', "medium")
whisper_threads = int(os.getenv('WHISPER_THREADS', 4)) whisper_speedup = converttobool(os.getenv('WHISPER_SPEEDUP', "False"))
concurrent_transcriptions = int(os.getenv('CONCURRENT_TRANSCRIPTIONS', '1')) whisper_threads = os.getenv('WHISPER_THREADS', "4")
procaddedmedia = convert_to_bool(os.getenv('PROCADDEDMEDIA', "True")) whisper_processors = os.getenv('WHISPER_PROCESSORS', "1")
procmediaonplay = convert_to_bool(os.getenv('PROCMEDIAONPLAY', "True")) procaddedmedia = converttobool(os.getenv('PROCADDEDMEDIA', "True"))
procmediaonplay = converttobool(os.getenv('PROCMEDIAONPLAY', "False"))
namesublang = os.getenv('NAMESUBLANG', "aa") namesublang = os.getenv('NAMESUBLANG', "aa")
updaterepo = converttobool(os.getenv('UPDATEREPO', "True"))
skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', "eng") skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG', "eng")
webhookport = int(os.getenv('WEBHOOKPORT', 8090)) webhookport = os.getenv('WEBHOOKPORT', 8090)
word_level_highlight = convert_to_bool(os.getenv('WORD_LEVEL_HIGHLIGHT', "False"))
debug = convert_to_bool(os.getenv('DEBUG', False))
use_path_mapping = convert_to_bool(os.getenv('USE_PATH_MAPPING', False))
path_mapping_from = os.getenv('PATH_MAPPING_FROM', '/tv')
path_mapping_to = os.getenv('PATH_MAPPING_TO', '/Volumes/TV')
app = Flask(__name__) app = Flask(__name__)
model = stable_whisper.load_faster_whisper(whisper_model, cpu_threads=whisper_threads)
files_to_transcribe = set()
subextension = '.subgen.' + whisper_model + '.' + namesublang + '.srt'
@app.route("/webhook", methods=["POST"]) @app.route("/webhook", methods=["POST"])
def receive_webhook(): def receive_webhook():
if debug:
print("We got a hook, let's figure out where it came from!")
if request.headers.get("source") == "Tautulli": if request.headers.get("source") == "Tautulli":
payload = request.json payload = request.json
if debug:
print("This hook is from Tautulli!")
else: else:
payload = json.loads(request.form['payload']) payload = json.loads(request.form['payload'])
event = payload.get("event") event = payload.get("event")
if debug:
print("event hook: " + str(payload))
if ((event == "library.new" or event == "added") and procaddedmedia) or ((event == "media.play" or event == "played") and procmediaonplay): if ((event == "library.new" or event == "added") and procaddedmedia) or ((event == "media.play" or event == "played") and procmediaonplay):
if event == "library.new" or event == "media.play": # these are the plex webhooks! if event == "library.new" or event == "media.play": # these are the plex webhooks!
print("This hook is from Plex!") print("Plex webhook received!")
fullpath = get_file_name(payload.get("Metadata").get("ratingKey"), plexserver, plextoken) metadata = payload.get("Metadata")
ratingkey = metadata.get("ratingKey")
fullpath = get_file_name(ratingkey, plexserver, plextoken)
elif event == "added" or event == "played": elif event == "added" or event == "played":
print("Tautulli webhook received!") print("Tautulli webhook received!")
fullpath = payload.get("file") fullpath = payload.get("file")
@@ -57,71 +53,64 @@ def receive_webhook():
print("Didn't get a webhook we expected, discarding") print("Didn't get a webhook we expected, discarding")
return "" return ""
print("Path of file: " + fullpath) filename = pathlib.Path(fullpath).name
if use_path_mapping: filepath = os.path.dirname(fullpath)
fullpath = fullpath.replace(path_mapping_from, path_mapping_to) filenamenoextension = filename.replace(pathlib.Path(fullpath).suffix, "")
print("Updated path: " + fullpath.replace(path_mapping_from, path_mapping_to))
print("fullpath: " + fullpath)
print("filepath: " + filepath)
print("file name with no extension: " + filenamenoextension)
print("event: " + event) print("event: " + event)
print("Transcriptions are limited to running " + str(concurrent_transcriptions) + " at a time")
print("Running " + str(whisper_threads) + " threads per transcription")
add_file_for_transcription(fullpath) if skipifinternalsublang in str(subprocess.check_output("ffprobe -loglevel error -select_streams s -show_entries stream=index:stream_tags=language -of csv=p=0 \"{}\"".format(fullpath), shell=True)):
return ""
def gen_subtitles(inputvideo):
try:
print(f"Transcribing file: {inputvideo}")
result = model.transcribe_stable(inputvideo)
result.to_srt_vtt(inputvideo + subextension, word_level=word_level_highlight)
print(f"Transcription of {file_path} is completed.")
files_to_transcribe.remove(inputvideo)
except Exception as e:
print(f"Error processing or transcribing {file_path}: {e}")
# Function to add a file for transcription
def add_file_for_transcription(file_path):
if file_path not in files_to_transcribe:
if has_subtitle_language(file_path, skipifinternalsublang):
print("File already has an internal sub we want, skipping generation") print("File already has an internal sub we want, skipping generation")
return "File already has an internal sub we want, skipping generation" return "File already has an internal sub we want, skipping generation"
elif os.path.exists(file_path.rsplit('.', 1)[0] + subextension): elif os.path.isfile("{}.output.wav".format(fullpath)):
print("WAV file already exists, we're assuming it's processing and skipping it")
return "WAV file already exists, we're assuming it's processing and skipping it"
elif len(glob.glob("{}/{}*subgen*".format(filepath, filenamenoextension))) > 0:
print("We already have a subgen created for this file, skipping it") print("We already have a subgen created for this file, skipping it")
return "We already have a subgen created for this file, skipping it" return "We already have a subgen created for this file, skipping it"
files_to_transcribe.add(file_path) if whisper_speedup:
print(f"Added {file_path} for transcription.") print("This is a speedup run!")
# Start transcription for the file in a separate thread print(whisper_speedup)
finalsubname = "{0}/{1}.subgen.{2}.speedup.{3}".format(filepath, filenamenoextension, whisper_model, namesublang)
transcription_thread = threading.Thread(target=gen_subtitles, args=(file_path,))
transcription_thread.start()
else: else:
print(f"File {file_path} is already in the transcription list. Skipping.") print("No speedup")
finalsubname = "{0}/{1}.subgen.{2}.{3}".format(filepath, filenamenoextension, whisper_model, namesublang)
def has_subtitle_language(video_file, target_language): gen_subtitles(fullpath, "{}.output.wav".format(fullpath), finalsubname)
try:
container = av.open(video_file)
subtitle_stream = None
# Iterate through the streams in the video file if os.path.isfile("{}.output.wav".format(fullpath)):
for stream in container.streams: print("Deleting WAV workfile")
if stream.type == 'subtitle': os.remove("{}.output.wav".format(fullpath))
# Check if the subtitle stream has the target language
if 'language' in stream.metadata and stream.metadata['language'] == target_language:
subtitle_stream = stream
break
if subtitle_stream: return ""
print(f"Subtitles in '{target_language}' language found in the video.")
return True
else:
print(f"No subtitles in '{target_language}' language found in the video.")
container.close() def gen_subtitles(filename, inputwav, finalsubname):
except Exception as e: strip_audio(filename)
print(f"An error occurred: {e}") run_whisper(inputwav, finalsubname)
return False
def strip_audio(filename):
print("Starting strip audio")
command = "ffmpeg -y -i \"{}\" -ar 16000 -ac 1 -c:a pcm_s16le \"{}.output.wav\"".format(
filename, filename)
print("Command: " + command)
subprocess.call(command, shell=True)
print("Done stripping audio")
def run_whisper(inputwav, finalsubname):
print("Starting whisper")
os.chdir("/whisper.cpp")
command = "./main -m models/ggml-{}.bin -of \"{}\" -t {} -p {} -osrt -f \"{}\"" .format(
whisper_model, finalsubname, whisper_threads, whisper_processors, inputwav)
if (whisper_speedup):
command = command.replace("-osrt", "-osrt -su")
print("Command: " + command)
subprocess.call(command, shell=True)
print("Done with whisper")
def get_file_name(item_id, plexserver, plextoken): def get_file_name(item_id, plexserver, plextoken):
url = f"{plexserver}/library/metadata/{item_id}" url = f"{plexserver}/library/metadata/{item_id}"
@@ -137,6 +126,18 @@ def get_file_name(item_id, plexserver, plextoken):
print(f"Error: {response.text}") print(f"Error: {response.text}")
return return
if not os.path.isdir("/whisper.cpp"):
os.mkdir("/whisper.cpp")
os.chdir("/whisper.cpp")
subprocess.call("git clone https://github.com/ggerganov/whisper.cpp .", shell=True)
if updaterepo:
print("Updating repo!")
#subprocess.call("git pull", shell=True)
if os.path.isfile("/whisper.cpp/samples/jfk.wav"): # delete the sample file, so it doesn't try transcribing it. Saves us a couple seconds.
print("Deleting sample file")
#os.remove("/whisper.cpp/samples/jfk.wav")
subprocess.call("make " + whisper_model, shell=True)
print("Starting webhook!") print("Starting webhook!")
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=debug, host='0.0.0.0', port=int(webhookport)) app.run(debug=False, host='0.0.0.0', port=int(webhookport))