random ids

2021-07-26 20:45:06 +03:00 · 2021-03-19 17:52:35 -04:00
parent 012e00e380
commit b828099d4d
2 changed files with 6 additions and 5 deletions
--- a/app.py
+++ b/app.py
@@ -1,10 +1,10 @@
 from flask import Flask, request
 from flask_cors import CORS
+import uuid 
 import subprocess
 import os


-
 app = Flask(__name__)
 CORS(app)

@@ -22,9 +22,10 @@ def index():
 def get_mapping():
 	try:
 		video_id = request.args.get('videoid')
+		transcript_id = str(uuid.uuid1())
 		mapping = {}
-		rc = subprocess.call(["./get_transcript.sh", video_id])
-		transcript_file = open('transcript.txt','r')
+		rc = subprocess.call(["./get_transcript.sh", video_id, transcript_id])
+		transcript_file = open('transcript_{}.txt'.format(transcript_id),'r')
 		for line in transcript_file.readlines():
 			line_split = line.strip().split(' ')
 			timestamp = line_split[0]
@@ -35,7 +36,7 @@ def get_mapping():
 					mapping[word] = sorted(list(set(mapping[word])))
 				else:
 					mapping[word] = [timeStampSeconds]
-		os.remove('transcript.txt')
+		os.remove('transcript_{}.txt'.format(transcript_id))
 		return {'mapping':mapping}
 	except:
 		return {'mapping':{}}
--- a/get_transcript.sh
+++ b/get_transcript.sh
@@ -8,7 +8,7 @@ youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1"
 vtt=$(ls *.vtt)

 # Extract the subs and save as plaintext, removing time, new lines and other markup
-sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript.txt"
+sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"

 # Remove the original .vtt subs file
 rm -f "$vtt"