mirror of
https://github.com/MoizAhmedd/youtube-video-search.git
synced 2021-07-26 20:45:06 +03:00
random ids
This commit is contained in:
9
app.py
9
app.py
@@ -1,10 +1,10 @@
|
|||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
import uuid
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
CORS(app)
|
CORS(app)
|
||||||
|
|
||||||
@@ -22,9 +22,10 @@ def index():
|
|||||||
def get_mapping():
|
def get_mapping():
|
||||||
try:
|
try:
|
||||||
video_id = request.args.get('videoid')
|
video_id = request.args.get('videoid')
|
||||||
|
transcript_id = str(uuid.uuid1())
|
||||||
mapping = {}
|
mapping = {}
|
||||||
rc = subprocess.call(["./get_transcript.sh", video_id])
|
rc = subprocess.call(["./get_transcript.sh", video_id, transcript_id])
|
||||||
transcript_file = open('transcript.txt','r')
|
transcript_file = open('transcript_{}.txt'.format(transcript_id),'r')
|
||||||
for line in transcript_file.readlines():
|
for line in transcript_file.readlines():
|
||||||
line_split = line.strip().split(' ')
|
line_split = line.strip().split(' ')
|
||||||
timestamp = line_split[0]
|
timestamp = line_split[0]
|
||||||
@@ -35,7 +36,7 @@ def get_mapping():
|
|||||||
mapping[word] = sorted(list(set(mapping[word])))
|
mapping[word] = sorted(list(set(mapping[word])))
|
||||||
else:
|
else:
|
||||||
mapping[word] = [timeStampSeconds]
|
mapping[word] = [timeStampSeconds]
|
||||||
os.remove('transcript.txt')
|
os.remove('transcript_{}.txt'.format(transcript_id))
|
||||||
return {'mapping':mapping}
|
return {'mapping':mapping}
|
||||||
except:
|
except:
|
||||||
return {'mapping':{}}
|
return {'mapping':{}}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1"
|
|||||||
vtt=$(ls *.vtt)
|
vtt=$(ls *.vtt)
|
||||||
|
|
||||||
# Extract the subs and save as plaintext, removing time, new lines and other markup
|
# Extract the subs and save as plaintext, removing time, new lines and other markup
|
||||||
sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript.txt"
|
sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"
|
||||||
|
|
||||||
# Remove the original .vtt subs file
|
# Remove the original .vtt subs file
|
||||||
rm -f "$vtt"
|
rm -f "$vtt"
|
||||||
|
|||||||
Reference in New Issue
Block a user