remove vtt files properly

This commit is contained in:
MoizAhmedd
2021-03-19 18:17:03 -04:00
parent b828099d4d
commit 901e2491ff

View File

@@ -2,15 +2,11 @@
# video-cap.sh videoUrl sub.txt
# Download captions only and save in a .vtt file
youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1";
youtube-dl --skip-download --write-auto-sub "https://www.youtube.com/watch?v=$1" -o "$2";
# Find .vtt files in current directory created within last 3 seconds, limit to 1
vtt=$(ls *.vtt)
# Extract the subs and save as plaintext, removing time, new lines and other markup
sed '1,/^$/d' "$vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"
# Remove the original .vtt subs file
rm -f "$vtt"
# # Extract the subs and save as plaintext, removing time, new lines and other markup
sed '1,/^$/d' "$2.en.vtt" | sed 's/<[^>]*>//g' | awk -F. 'NR%8==1{printf"%s ",$1}NR%8==3' > "transcript_$2.txt"
# # Remove the original .vtt subs file
rm -f "$2.en.vtt"