#This script will take an SRT subtitle file as input and automatically transform it into files that allow for the best possible translation. It doesn't work with other subtitle formats such as ASS or VTT. Those should first be converted to SRT. printf "\nYou are about to convert your subtitle file into files that you can more easily process and translate.\nWhen the process is over, do you wish to delete all intermediate files?\nIf so, please type yes.\n" read reply printf "Supported languages with corresponding code:\nen for English\nar for Arabic\naz for Azerbaijani\nzh for Chinese\ncs for Czech\nnl for Dutch\neo for Esperanto\nfi for Finnish\nfr for French\nde for German\nel for Greek\nhi for Hindi\nhu for Hungarian\nid for Indonesian\nga for Irish\nit for Italian\nja for Japanese\nko for Korean\nfa for Persian\npl for Polish\npt for Portuguese\nru for Russian\nsk for Slovak\nes for Spanish\nsv for Swedish\ntr for Turkish\nuk for Ukranian\nvi for Vietnamese\n" printf "Which language is your subtitle file in? Please type in the format, according to the formats listed above\n" read fromlang printf "\nAnd to which language do you wish translate to? Again, type the format as listed before.\n" read tolang printf "\nWe will start by correcting the subtitle file and output CUES and TEXT files.\nDo you wish to correct CAPITAL LETTERS in TEXT file? If so, please type yes.\n" read answer sed 's/ */\n/g' "$1" | sed 's/ *\r*$//' | LC_ALL=C awk -F \\n -v RS='' ' function to_sec(t) { n = split(t, hms, /:/) sub(/,/, ".", hms[n]) return hms[n] + 60 * hms[--n] + 3600 * hms[--n] } function print_time() { h = int(time / 3600) m = int((time - 3600 * h) / 60) s = sprintf("%02.3f", time - 3600 * h - 60 * m) sub(/\./, ",", s) printf "%02d:%02d:%s", h, m, s } function print_cue(duration) { print ++nb print_time() printf " --> " time += duration print_time() } { for (; $NF == ""; --NF); split($2, interval, /-->/) time = to_sec(interval[1]) duration = (to_sec(interval[2]) - time) / (length - length($1) - length($2) - 1) for (i = 3; i <= NF; ++i) { sentence = sentence "\n" $i if (i == NF || $i ~ /[.?!] *$/) { print_cue(length(sentence) * duration) print sentence "\n" sentence = "" } } }' | awk -v RS='' 'out == "" { begin = $2 } { end = $4; sub(/^[^\n]*\n[^\n]*\n/, ""); gsub(/\n/, " "); out = out " " $0 } $NF ~ /[.?!] *$/ { print begin " --> " end > "cues"; print substr(out, 2) "\n" > "text"; out = "" }' #this part will, if necessary, correct CAPITAL LETTERS, while attempting at preserving the best possible visual structure of the text if [ "$answer" == "yes" ]; then sed 's/[A-Z]/\L&/g' text | sed -E 's/(^[a]|\. [a-z]|\! [a-z]|\? [a-z])/\U&\E/g' | sed 's/^\(.\)/\U\1/' | sed "s/ i / I /g;s/i'/I'/g" > tmp && mv tmp text printf "CUES and TEXT files were created. Capital letters were corrected.\n" else printf "CUES and TEXT files were created.\n" fi; cp text text.txt rm text #translation process through LibreTranslate - upload translatedfile=$(curl -X POST "https://libretranslate.pussthecat.org/translate_file" -H "accept: application/json" -H "Content-Type: multipart/form-data" -F "file=@text.txt;type=text/plain" -F "source=$fromlang" -F "target=$tolang" | cut -c 23- | rev | cut -c 3- | rev ) #translation process through LibreTranslate - download curl -o text --globoff -C - "$translatedfile" #this part will put together CUES and TEXT to create a new SRT subtitle file awk -v cues=cues 'NF { text = $0; getline < cues; print ++nb "\n" $0 "\n" text "\n" }' text > fullsrt.srt #this part will take care of long lines on the subtitle file, creating a better reading experience printf "\nHow many characters should be allowed by line in the final subtitle?\n" read maxcharacter max=$maxcharacter shift awk -v max=$max ' /^ *[0-9:,.]* *-->/ { print next } { for (out = $NF; NF; ) { soft_min = (length * (1 - 1 / max) + 1) / max if (soft_min != int(soft_min)) soft_min = int(soft_min) + 1 soft_min = (length + 1) / soft_min - 1 for (l = length($NF) + length($--NF); NF && ++l <= soft_min; l += length($--NF)) out = $NF " " out if (NF) { if (l <= max) { out = $NF " " out --NF } out = $NF "\n" out } } print out }' fullsrt.srt | LC_ALL=C awk -F \\n -v RS='' ' function to_sec(t) { n = split(t, hms, /:/) sub(/,/, ".", hms[n]) return hms[n] + 60 * hms[--n] + 3600 * hms[--n] } function print_time() { h = int(time / 3600) m = int((time - 3600 * h) / 60) s = sprintf("%02.3f", time - 3600 * h - 60 * m) sub(/\./, ",", s) printf "%02d:%02d:%s", h, m, s } function print_cue(duration) { print ++nb print_time() printf " --> " time += duration print_time() } { for (; $NF == ""; --NF); split($2, interval, /-->/) time = to_sec(interval[1]) duration = (to_sec(interval[2]) - time) / (length - length($1) - length($2) - 1) for (i = 3; i < NF; ++i) { print_cue((length($i) + length($(i + 1)) + 2) * duration) print "\n" $i "\n" $++i "\n" } } i == NF { print_cue((length($i) + 1) * duration) print "\n" $i "\n" }' > finalsubtitle.srt if [ "$reply" == "yes" ]; then rm fullsrt.srt rm cues rm text rm text.txt fi;