diff --git a/app.py b/app.py index 3c21bd3..51ca1fd 100644 --- a/app.py +++ b/app.py @@ -1,10 +1,10 @@ from flask import Flask, request from flask_cors import CORS +import uuid import subprocess import os - app = Flask(__name__) CORS(app) @@ -22,9 +22,10 @@ def index(): def get_mapping(): try: video_id = request.args.get('videoid') + transcript_id = str(uuid.uuid1()) mapping = {} - rc = subprocess.call(["./get_transcript.sh", video_id]) - transcript_file = open('transcript.txt','r') + rc = subprocess.call(["./get_transcript.sh", video_id, transcript_id]) + transcript_file = open('transcript_{}.txt'.format(transcript_id),'r') for line in transcript_file.readlines(): line_split = line.strip().split(' ') timestamp = line_split[0] @@ -35,7 +36,7 @@ def get_mapping(): mapping[word] = sorted(list(set(mapping[word]))) else: mapping[word] = [timeStampSeconds] - os.remove('transcript.txt') + os.remove('transcript_{}.txt'.format(transcript_id)) return {'mapping':mapping} except: return {'mapping':{}} diff --git a/get_transcript.sh b/get_transcript.sh index 0b831aa..a9290ec 100755 --- a/get_transcript.sh +++ b/get_transcript.sh @@ -8,7 +8,7 @@ youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1" vtt=$(ls *.vtt) # Extract the subs and save as plaintext, removing time, new lines and other markup -sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript.txt" +sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt" # Remove the original .vtt subs file rm -f "$vtt"