mirror of
https://github.com/MoizAhmedd/youtube-video-search.git
synced 2021-07-26 20:45:06 +03:00
remove vtt files properly
This commit is contained in:
@@ -2,15 +2,11 @@
|
||||
# video-cap.sh videoUrl sub.txt
|
||||
|
||||
# Download captions only and save in a .vtt file
|
||||
youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1";
|
||||
youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1" -o "$2";
|
||||
|
||||
# Find .vtt files in current directory created within last 3 seconds, limit to 1
|
||||
vtt=$(ls *.vtt)
|
||||
|
||||
# Extract the subs and save as plaintext, removing time, new lines and other markup
|
||||
sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"
|
||||
|
||||
# Remove the original .vtt subs file
|
||||
rm -f "$vtt"
|
||||
# # Extract the subs and save as plaintext, removing time, new lines and other markup
|
||||
sed '1,/^$/d' "$2.en.vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"
|
||||
|
||||
# # Remove the original .vtt subs file
|
||||
rm -f "$2.en.vtt"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user