#!/bin/sh # Copyright 2022 Loïc Cerf (lcerf@dcc.ufmg.br) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. if [ -z "$1" -o "$1" = "-h" -o "$1" = "--help" ] then printf "Usage: $0 max_char_per_line [file.srt]... Approximately-evenly break too-long lines in .srt subtitles into lines of at most max_char_per_line (> 1) characters (except for single words) and always have at most two lines on the screen. " exit fi max=$1 shift sed -e 's/\b\([nd][eao]\) /\1~/g' -e 's/\b\([aeiouyAEIOUY]\) /\1~/g' "$@" | awk -v max=$max ' function after_last_punct() { if (NF > 3) { next_NF = NF - 1 for (after = $next_NF " " $NF; --next_NF != 1 && $next_NF !~ /[[:punct:]]$/; after = $next_NF " " after); if (next_NF != 1) return after } next_NF = 0 return $0 } function after_intermediary_punct() { if (NF > 2) { next_NF = NF for (after = $NF; --next_NF != 1 && $next_NF !~ /[[:punct:]]$/; after = $next_NF " " after); if (next_NF != 1) return after } next_NF = 0 return $0 } /^ *[0-9:,.]* *-->/ { print next } { for (out = after_last_punct(); NF; ) { soft_min = (length * (1 - 1 / max) + 1) / max if (soft_min != int(soft_min)) soft_min = int(soft_min) + 1 soft_min = (length + 1) / soft_min - 1 l = length(after_last_punct()) NF = next_NF for (l += length(after_intermediary_punct()); NF && ++l <= soft_min; l += length(after_intermediary_punct())) { out = after_intermediary_punct() " " out NF = next_NF } if (NF) { if (l <= max) { out = after_intermediary_punct() " " out NF = next_NF } out = after_last_punct() "\n" out } } print out }' | LC_ALL=C awk -F \\n -v RS='' -v max=$max ' function to_sec(t) { n = split(t, hms, /:/) sub(/,/, ".", hms[n]) return hms[n] + 60 * hms[--n] + 3600 * hms[--n] } function print_time() { h = int(time / 3600) m = int((time - 3600 * h) / 60) s = sprintf("%02.3f", time - 3600 * h - 60 * m) sub(/\./, ",", s) printf "%02d:%02d:%s", h, m, s } function print_cue(duration) { print ++nb print_time() printf " --> " time += duration print_time() } { for (; $NF == ""; --NF); split($2, interval, /-->/) time = to_sec(interval[1]) duration = (to_sec(interval[2]) - time) / (length - length($1) - length($2) - 1) for (i = 3; i < NF; ++i) if (length($i) > max) { print_cue((length($i) + 1) * duration) print "\n" $i "\n" } else if (length($(i + 1)) > max) { print_cue((length($i) + 1) * duration) print "\n" $i "\n" print_cue((length($(i + 1)) + 1) * duration) print "\n" $++i "\n" } else { print_cue((length($i) + length($(i + 1)) + 2) * duration) print "\n" $i "\n" $++i "\n" } } i == NF { print_cue((length($i) + 1) * duration) print "\n" $i "\n" }' | awk -v max=$max ' /^ *[0-9:,.]* *-->/ { print next } { for (out = $NF; NF; ) { soft_min = (length * (1 - 1 / max) + 1) / max if (soft_min != int(soft_min)) soft_min = int(soft_min) + 1 soft_min = (length + 1) / soft_min - 1 for (l = length($NF) + length($--NF); NF && ++l <= soft_min; l += length($--NF)) out = $NF " " out if (NF) { if (l <= max) { out = $NF " " out --NF } out = $NF "\n" out } } print out }' | LC_ALL=C awk -F \\n -v RS='' ' function to_sec(t) { n = split(t, hms, /:/) sub(/,/, ".", hms[n]) return hms[n] + 60 * hms[--n] + 3600 * hms[--n] } function print_time() { h = int(time / 3600) m = int((time - 3600 * h) / 60) s = sprintf("%02.3f", time - 3600 * h - 60 * m) sub(/\./, ",", s) printf "%02d:%02d:%s", h, m, s } function print_cue(duration) { print ++nb print_time() printf " --> " time += duration print_time() } { for (; $NF == ""; --NF); split($2, interval, /-->/) time = to_sec(interval[1]) duration = (to_sec(interval[2]) - time) / (length - length($1) - length($2) - 1) for (i = 3; i < NF; ++i) { print_cue((length($i) + length($(i + 1)) + 2) * duration) print "\n" $i "\n" $++i "\n" } } i == NF { print_cue((length($i) + 1) * duration) print "\n" $i "\n" }' | sed -e 's/\b\([nd][eao]\)~/\1 /g' -e 's/\b\([aeiouyAEIOUY]\)~/\1 /g'