Add files via upload

2023-01-28 10:37:06 -07:00
parent 0cec89ce37
commit 700432a4f8
3 changed files with 155 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,26 @@
+#docker-compose.yml
+version: '2'
+  
+services:
+  subgen:
+    container_name: subgen
+    ports:
+      - "8090:8090"
+    build:
+      dockerfile: ./subgen/Dockerfile
+      context: .
+    environment:
+        - "WHISPER_MODEL=medium"
+        - "WHISPER_SPEEDUP=False"
+        - "WHISPER_THREADS=4"
+        - "WHISPER_PROCESSORS=1"
+        - "PROCADDEDMEDIA=True"
+        - "PROCMEDIAONPLAY=True"
+        - "NAMESUBLANG=aa"
+        - "UPDATEREPO=True"
+    volumes:
+       - "${TV}:/tv"
+       - "${MOVIES}:/movies"
+       - "${APPDATA}/subgen:/whisper.cpp"
+       
+       
--- a/subgen/Dockerfile
+++ b/subgen/Dockerfile
@@ -0,0 +1,28 @@
+FROM python
+
+ARG WHISPER_MODEL medium
+# this can be tiny, base, small, medium, large
+ARG WHISPER_SPEEDUP False
+# this adds the option -su "speed up audio by x2 (reduced accuracy)"
+ARG WHISPER_THREADS 4
+# number of threads to use during computation
+ARG WHISPER_PROCESSORS 1
+# number of processors to use during computation
+
+ARG PROCADDEDMEDIA True
+# will gen subtitles for all media added regardless of existing external/embedded subtitles
+ARG PROCMEDIAONPLAY True
+# will gen subtitles for media that is missing a 'subgen' file when played (you have to wait for it, then enable it manually)
+ARG NAMESUBLANG aa
+# use 2 letter codes @ https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes // makes it easier to see which subtitles are generated by subgen -- names file like: "Community - S03E18 - Course Listing Unavailable.aa.subgen.srt"
+ARG UPDATEREPO True
+# pulls and merges whisper.cpp on every start
+
+RUN apt-get update && apt-get install -y ffmpeg git gcc
+
+RUN pip3 install webhook_listener
+EXPOSE 8090
+
+COPY ./subgen/subgen.py /
+
+CMD [ "python3", "-u", "/subgen.py", "$WHISPER_MODEL", "$WHISPER_SPEEDUP", "$WHISPER_THREADS", "$WHISPER_PROCESSORS", "$PROCADDEDMEDIA", "$PROCMEDIAONPLAY", "$NAMESUBLANG", "$UPDATEREPO"]
--- a/subgen/subgen.py
+++ b/subgen/subgen.py
@@ -0,0 +1,101 @@
+import sys
+import os
+import time
+import json
+import glob
+import pathlib
+import webhook_listener
+import subprocess
+
+# parse our arguments from the Dockerfile
+whisper_model = sys.argv[1]
+whisper_speedup = sys.argv[2]
+whisper_threads = sys.argv[3]
+whisper_processors = sys.argv[4]
+procaddedmedia = sys.argv[5]
+procmediaonplay = sys.argv[6]
+namesublang = sys.argv[7]
+updaterepo = sys.argv[8]
+
+def process_post_request(request, *args, **kwargs):
+    print("Received a webhook!")
+    if int(request.headers.get('Content-Length', 0)) > 0:
+        body = request.body.read(
+            int(request.headers['Content-Length'])).decode()
+    else:
+        body = '{}'
+
+    print(body)
+    fullpath = json.loads(body)['file']
+    filename = json.loads(body)['filename']
+    event = json.loads(body)['event']
+    filepath = os.path.dirname(fullpath)
+    extension = pathlib.Path(filename).suffix
+    filenamenoextension = filename.replace(extension, "")
+
+    print("fullpath: " + fullpath)
+    print("filename: " + filename)
+    print("filepath: " + filepath)
+    print("extension: " + extension)
+    print("file name with no extension: " + filenamenoextension)
+    print("event: " + event)
+
+    if ((procaddedmedia and event == "added") or (procmediaonplay and event == "played")) and (len(glob.glob("{}/{}*subgen*".format(filepath, filenamenoextension))) == 0): #glob nonsense checks if there exists a subgen file already and won't make a new one
+        if whisper_speedup:
+            finalsubname = "{0}/{1}.subgen.{2}.speedup.{3}".format(
+                filepath, filenamenoextension, whisper_model, namesublang)
+        else:
+            finalsubname = "{0}/{1}.subgen.{2}.{3}".format(
+                filepath, filenamenoextension, whisper_model, namesublang)
+                
+        gen_subtitles(fullpath, "{}.output.wav".format(fullpath), finalsubname)
+
+        if os.path.isfile("{}.output.wav".format(fullpath)):
+            print("Deleting WAV workfile")
+            os.remove("{}.output.wav".format(fullpath))
+
+    return
+
+def gen_subtitles(filename, inputwav, finalsubname):
+    strip_audio(filename)
+    run_whisper(inputwav, finalsubname)
+
+def strip_audio(filename):
+    print("Starting strip audio")
+    command = "ffmpeg -y -i \"{}\" -ar 16000 -ac 1 -c:a pcm_s16le \"{}.output.wav\"".format(
+        filename, filename)
+    print("Command: " + command)
+    subprocess.call(command, shell=True)
+    print("Done stripping audio")
+
+def run_whisper(inputwav, finalsubname):
+    print("Starting whisper")
+    os.chdir("/whisper.cpp")
+    command = "./main -m models/ggml-{}.bin -of \"{}\" -t {} -p {} -osrt -f \"{}\"" .format(
+        whisper_model, finalsubname, whisper_threads, whisper_processors, inputwav)
+    if (whisper_speedup):
+        command = command.replace("-osrt", "-osrt -su")
+    print("Command: " + command)
+    subprocess.call(command, shell=True)
+
+    print("Done with whisper")
+
+if not os.path.isdir("/whisper.cpp"):
+    os.mkdir("/whisper.cpp")
+os.chdir("/whisper.cpp")
+subprocess.call("git clone https://github.com/ggerganov/whisper.cpp .", shell=True)
+if updaterepo:
+    print("Updating repo!")
+    subprocess.call("git pull", shell=True)
+if os.path.isfile("/whisper.cpp/samples/jfk.wav"):
+    print("Deleting sample file")
+    os.remove("/whisper.cpp/samples/jfk.wav")
+subprocess.call("make " + whisper_model, shell=True)
+print("Starting webhook!")
+webhooks = webhook_listener.Listener(handlers={"POST": process_post_request})
+webhooks.start()
+print("Webhook started")
+
+while True:
+    print("Still alive...")
+    time.sleep(300)