Update subgen.py

Cleaned up some of the environmental variables and added the ability to not generate subtitles if the file has an internal sub of the language you want.  This has to be set in SKIPIFINTERNALSUBLANG with a 3 letter code, IE "SKIPIFINTERNALSUBLANG=eng" (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).

Script will now default to values listed in the readme (frontpage)
This commit is contained in:
McCloudS
2023-01-30 14:44:42 -07:00
committed by GitHub
parent dd84b30bd0
commit ab918bc8ba

View File

@@ -8,14 +8,15 @@ import webhook_listener
import subprocess
# parse our arguments from the Dockerfile
whisper_model = sys.argv[1]
whisper_speedup = sys.argv[2]
whisper_threads = sys.argv[3]
whisper_processors = sys.argv[4]
procaddedmedia = sys.argv[5]
procmediaonplay = sys.argv[6]
namesublang = sys.argv[7]
updaterepo = sys.argv[8]
whisper_model = os.getenv('WHISPER_MODEL') or "medium"
whisper_speedup = bool(os.getenv('WHISPER_SPEEDUP'))
whisper_threads = os.getenv('WHISPER_THREADS') or "4"
whisper_processors = os.getenv('WHISPER_PROCESSORS') or "1"
procaddedmedia = bool(os.getenv('PROCADDEDMEDIA')) or True
procmediaonplay = bool(os.getenv('PROCMEDIAONPLAY'))
namesublang = os.getenv('NAMESUBLANG') or "aa"
updaterepo = bool(os.getenv('UPDATEREPO')) or True
skipifinternalsublang = os.getenv('SKIPIFINTERNALSUBLANG') or "eng"
def process_post_request(request, *args, **kwargs):
print("Received a webhook!")
@@ -40,9 +41,14 @@ def process_post_request(request, *args, **kwargs):
print("file name with no extension: " + filenamenoextension)
print("event: " + event)
if ((procaddedmedia and event == "added") or (procmediaonplay and event == "played")) and (len(glob.glob("{}/{}*subgen*".format(filepath, filenamenoextension))) == 0) and not os.path.isfile("{}.output.wav".format(fullpath)): #glob nonsense checks if there exists a subgen file already and won't make a new one
if os.getenv('WHISPER_SPEEDUP') == "True" :
file_has_internal_sub = skipifinternalsublang in str(subprocess.check_output("ffprobe -loglevel error -select_streams s -show_entries stream=index:stream_tags=language -of csv=p=0 \"{}\"".format(fullpath), shell=True)) # skips generation if an internal sub exists
if file_has_internal_sub:
print("File already has an internal sub we want, skipping generation")
if ((procaddedmedia and event == "added") or (procmediaonplay and event == "played")) and (len(glob.glob("{}/{}*subgen*".format(filepath, filenamenoextension))) == 0) and not os.path.isfile("{}.output.wav".format(fullpath)) and not file_has_internal_sub: #glob nonsense checks if there exists a subgen file already and won't make a new one
if whisper_speedup:
print("This is a speedup run!")
print(os.getenv('WHISPER_SPEEDUP'))
finalsubname = "{0}/{1}.subgen.{2}.speedup.{3}".format(
filepath, filenamenoextension, whisper_model, namesublang)
else:
@@ -86,7 +92,7 @@ if not os.path.isdir("/whisper.cpp"):
os.mkdir("/whisper.cpp")
os.chdir("/whisper.cpp")
subprocess.call("git clone https://github.com/ggerganov/whisper.cpp .", shell=True)
if os.getenv('UPDATEREPO') == "True":
if updaterepo:
print("Updating repo!")
subprocess.call("git pull", shell=True)
if os.path.isfile("/whisper.cpp/samples/jfk.wav"):