347 lines
13 KiB
Bash
Executable File
347 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# videocmp_select.sh
|
|
#
|
|
# Modes:
|
|
# 1) Pair mode: compare A and B
|
|
# ./videocmp_select.sh A.mp4 B.mp4 [options]
|
|
#
|
|
# 2) Directory scan mode: group by prefix before first "WoodmanCastingX" (case-insensitive)
|
|
# ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options]
|
|
# (DIR defaults to "." if omitted)
|
|
#
|
|
# Pipeline:
|
|
# 1) Validate files (ffprobe fields + ffmpeg deep decode)
|
|
# 2) Confirm same movie via snapshot SSIM @ --snapshot-time (default 12s)
|
|
# 3) Optional: run external compare impl (simple/advanced) for logging
|
|
# 4) Pick preferred: prefer --prefer-height (default 720), then longer duration, then larger file
|
|
# 5) Act on loser: --action print|move|delete (with --dry-run)
|
|
#
|
|
# Common options:
|
|
# --snapshot-time SEC (default: 12)
|
|
# --snapshot-scale WxH (default: 320:-1)
|
|
# --snapshot-ssim THRESH (default: 0.97)
|
|
# --impl simple|advanced (default: simple) # logs only
|
|
# --impl-simple PATH (default: ./compare_simple.sh)
|
|
# --impl-advanced PATH (default: ./compare_advanced.sh)
|
|
# --impl-optional (default) warn if impl missing
|
|
# --impl-required error if chosen impl missing
|
|
# --prefer-height N (default: 720)
|
|
# --duration-eps SEC (default: 0.0)
|
|
# --action print|move|delete (default: print)
|
|
# --trash-dir PATH (default: $HOME/.video_trash)
|
|
# --dry-run
|
|
# --verbose
|
|
#
|
|
# Directory-scan options:
|
|
# --scan-dir [DIR] Enable directory mode (DIR optional; default ".")
|
|
# --recursive, -r Recurse into subfolders
|
|
# --delimiter WORD (default: WoodmanCastingX) case-insensitive
|
|
#
|
|
# Exit codes:
|
|
# 0 success | 1 differ/broken | 2 usage | 3 missing dependency
|
|
set -u
|
|
|
|
# -------- defaults --------
|
|
SNAP_T="12"
|
|
SNAP_SCALE="320:-1"
|
|
SNAP_SSIM="0.97"
|
|
|
|
IMPL="simple"
|
|
IMPL_SIMPLE="./compare_simple.sh"
|
|
IMPL_ADV="./compare_advanced.sh"
|
|
IMPL_REQUIRED=0
|
|
|
|
PREF_HEIGHT=720
|
|
DURATION_EPS=0.0
|
|
|
|
ACTION="print"
|
|
TRASH_DIR="${HOME}/.video_trash"
|
|
DRY=0
|
|
VERBOSE=0
|
|
|
|
SCAN_DIR=""
|
|
RECURSIVE=0
|
|
DELIM="WoodmanCastingX"
|
|
|
|
# -------- helpers --------
|
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; }
|
|
v() { [[ $VERBOSE -eq 1 ]] && echo "[LOG]" "$@" >&2; }
|
|
die() { echo "[ERR]" "$@" >&2; exit 1; }
|
|
|
|
need ffmpeg; need ffprobe; need awk; need grep; need stat; need sed; need tr; need find
|
|
|
|
# -------- array-based option parser --------
|
|
ARGS=("$@")
|
|
REM_ARR=()
|
|
i=0
|
|
while (( i < ${#ARGS[@]} )); do
|
|
arg="${ARGS[i]}"
|
|
case "$arg" in
|
|
--snapshot-time) SNAP_T="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--snapshot-scale) SNAP_SCALE="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--snapshot-ssim) SNAP_SSIM="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--impl) IMPL="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--impl-simple) IMPL_SIMPLE="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--impl-advanced) IMPL_ADV="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--impl-optional) IMPL_REQUIRED=0; i=$((i+1));;
|
|
--impl-required) IMPL_REQUIRED=1; i=$((i+1));;
|
|
--prefer-height) PREF_HEIGHT="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--duration-eps) DURATION_EPS="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--action) ACTION="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--trash-dir) TRASH_DIR="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--dry-run) DRY=1; i=$((i+1));;
|
|
--verbose) VERBOSE=1; i=$((i+1));;
|
|
--scan-dir)
|
|
# optional arg: use next token unless it looks like another option
|
|
next="${ARGS[i+1]:-}"
|
|
if [[ -n "$next" && "$next" != --* ]]; then
|
|
SCAN_DIR="$next"; i=$((i+2))
|
|
else
|
|
SCAN_DIR="."; i=$((i+1))
|
|
fi
|
|
;;
|
|
--recursive|-r) RECURSIVE=1; i=$((i+1));;
|
|
--delimiter) DELIM="${ARGS[i+1]:-}"; i=$((i+2));;
|
|
--help|-h)
|
|
grep -E '^# ' "$0" | sed 's/^# //'
|
|
exit 0 ;;
|
|
*)
|
|
# leave positional for pair mode
|
|
REM_ARR+=("$arg"); i=$((i+1));;
|
|
esac
|
|
done
|
|
|
|
v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE DELIM='$DELIM' PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T"
|
|
|
|
# -------- core functions --------
|
|
probe_meta() { # file -> "w h dur codec size"
|
|
local f="$1" size w h dur vcodec
|
|
size=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f")
|
|
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" || echo 0)
|
|
h=$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=nw=1:nk=1 "$f" || echo 0)
|
|
dur=$(ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 "$f" || echo 0)
|
|
vcodec=$(ffprobe -v error -select_streams v:0 -show_entries stream=codec_name -of default=nw=1:nk=1 "$f" || echo "?")
|
|
echo "$w $h $dur $vcodec $size"
|
|
}
|
|
|
|
check_ok() { # file -> 0 ok / 1 bad
|
|
local f="$1" w h dur
|
|
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" 2>/dev/null || echo "")
|
|
h=$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=nw=1:nk=1 "$f" 2>/dev/null || echo "")
|
|
dur=$(ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 "$f" 2>/dev/null || echo "")
|
|
if [[ -z "$w" || -z "$h" || "$w" = "N/A" || "$h" = "N/A" || "$w" -eq 0 || "$h" -eq 0 ]]; then
|
|
echo "BROKEN(ffprobe: no valid video stream): $f" >&2; return 1; fi
|
|
if [[ -n "$dur" && "$dur" != "N/A" ]]; then
|
|
awk -v d="$dur" 'BEGIN{exit !(d+0>0)}' || { echo "BROKEN(ffprobe: nonpositive duration): $f" >&2; return 1; }
|
|
fi
|
|
if ! ffmpeg -v error -xerror -i "$f" -f null - -nostats >/dev/null 2>&1; then
|
|
echo "BROKEN(ffmpeg decode): $f" >&2; return 1; fi
|
|
return 0
|
|
}
|
|
|
|
snapshot_compare_ssim() { # f1 f2 time scale -> "ssim" (empty if fail)
|
|
local f1="$1" f2="$2" t="$3" sc="$4"
|
|
local tmpd s1 s2 log ssim
|
|
tmpd="$(mktemp -d)"; s1="$tmpd/1.png"; s2="$tmpd/2.png"; log="$tmpd/cmp.log"
|
|
ffmpeg -hide_banner -v error -y -i "$f1" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s1" || true
|
|
ffmpeg -hide_banner -v error -y -i "$f2" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s2" || true
|
|
if [[ ! -s "$s1" || ! -s "$s2" ]]; then rm -rf "$tmpd"; echo ""; return 0; fi
|
|
ffmpeg -hide_banner -v info -i "$s1" -i "$s2" -lavfi "ssim" -f null - > /dev/null 2> "$log" || true
|
|
ssim="$(grep -Eo 'All:[0-9]+(\.[0-9]+)?' "$log" | head -n1 | cut -d: -f2)"
|
|
[[ -z "$ssim" ]] && ssim="$(grep -Eo 'SSIM [^ ]* All:[0-9]+(\.[0-9]+)?' "$log" | awk -F'All:' '{print $2}' | head -n1)"
|
|
[[ -z "$ssim" ]] && ssim="$(grep -Eo 'SSIM Y:[0-9]+(\.[0-9]+)?' "$log" | head -n1 | cut -d: -f2)"
|
|
rm -rf "$tmpd"; echo "$ssim"
|
|
}
|
|
|
|
run_impl() { # impl, A, B
|
|
local which="$1" f1="$2" f2="$3" path=""
|
|
[[ "$which" == "advanced" ]] && path="$IMPL_ADV" || path="$IMPL_SIMPLE"
|
|
if [[ ! -x "$path" ]]; then
|
|
if [[ $IMPL_REQUIRED -eq 1 ]]; then
|
|
die "Requested --impl=$which but script not found/executable at $path"
|
|
else
|
|
echo "[impl:$which] not found ($path) — skipping" >&2; return 0
|
|
fi
|
|
fi
|
|
echo "[impl:$which] $path \"$f1\" \"$f2\"" >&2
|
|
"$path" "$f1" "$f2" 2>&1 | sed -n '1,12p' >&2
|
|
}
|
|
|
|
score_file() { # file -> "tier720 dur size"
|
|
local f="$1"
|
|
read -r W H DUR VC SIZE <<<"$(probe_meta "$f")"
|
|
local tier=1; [[ "$H" -eq "$PREF_HEIGHT" ]] && tier=0
|
|
echo "$tier $DUR $SIZE"
|
|
}
|
|
|
|
pick_winner() { # A B -> "KEEP|DROP|why"
|
|
local a="$1" b="$2"
|
|
read -r aTier aDur aSize <<<"$(score_file "$a")"
|
|
read -r bTier bDur bSize <<<"$(score_file "$b")"
|
|
v "Quality scores: A[tier=$aTier dur=$aDur size=$aSize] B[tier=$bTier dur=$bDur size=$bSize]"
|
|
if (( aTier < bTier )); then echo "$a|$b|prefer ${PREF_HEIGHT}p (A)"; return 0; fi
|
|
if (( bTier < aTier )); then echo "$b|$a|prefer ${PREF_HEIGHT}p (B)"; return 0; fi
|
|
awk -v A="$aDur" -v B="$bDur" -v eps="$DURATION_EPS" 'BEGIN{
|
|
if ((A-B) > eps) print "A"; else if ((B-A) > eps) print "B"; else print "TIE";
|
|
}' | {
|
|
read who
|
|
if [[ "$who" == "A" ]]; then echo "$a|$b|longer duration (A)"; return 0; fi
|
|
if [[ "$who" == "B" ]]; then echo "$b|$a|longer duration (B)"; return 0; fi
|
|
if (( aSize > bSize )); then echo "$a|$b|larger file size (A)"; else
|
|
if (( bSize > aSize )); then echo "$b|$a|larger file size (B)"; else
|
|
echo "$a|$b|tie-break (keep A)"; fi; fi
|
|
}
|
|
}
|
|
|
|
act_on_loser() { # loser keep
|
|
local loser="$1" keeper="$2"
|
|
case "$ACTION" in
|
|
print)
|
|
echo "[ACTION] Keep: $keeper"
|
|
echo "[ACTION] Drop: $loser"
|
|
;;
|
|
move)
|
|
mkdir -p -- "$TRASH_DIR"
|
|
if [[ $DRY -eq 1 ]]; then
|
|
echo "[ACTION] dry-run: mv \"$loser\" \"$TRASH_DIR/\""
|
|
else
|
|
mv -- "$loser" "$TRASH_DIR/" && echo "[ACTION] moved to trash: $loser -> $TRASH_DIR/"
|
|
fi
|
|
echo "[ACTION] kept: $keeper"
|
|
;;
|
|
delete)
|
|
if [[ $DRY -eq 1 ]]; then
|
|
echo "[ACTION] dry-run: rm \"$loser\""
|
|
else
|
|
rm -- "$loser" && echo "[ACTION] deleted: $loser"
|
|
fi
|
|
echo "[ACTION] kept: $keeper"
|
|
;;
|
|
*) echo "[WARN] Unknown --action='$ACTION' → printing only."; echo "[ACTION] Keep: $keeper ; Drop: $loser" ;;
|
|
esac
|
|
}
|
|
|
|
same_movie_or_skip() { # A B -> 0 if same (SSIM>=thr), else 1
|
|
local a="$1" b="$2" ssim
|
|
echo "== Snapshot compare @${SNAP_T}s: ==" >&2
|
|
ssim="$(snapshot_compare_ssim "$a" "$b" "$SNAP_T" "$SNAP_SCALE")"
|
|
if [[ -z "$ssim" ]]; then
|
|
echo "[WARN] Could not compute SSIM for: $a vs $b" >&2; return 1; fi
|
|
printf "[INFO] SSIM(All) %s vs %s → %s\n" "$(basename -- "$a")" "$(basename -- "$b")" "$ssim" >&2
|
|
awk -v s="$ssim" -v thr="$SNAP_SSIM" 'BEGIN{exit !(s+0 >= thr+0)}'
|
|
}
|
|
|
|
# ----- pair comparison driver -----
|
|
compare_pair() { # A B
|
|
local A="$1" B="$2"
|
|
echo "== Step 1: Validating files =="; okA=0; okB=0
|
|
check_ok "$A" && okA=1; check_ok "$B" && okB=1
|
|
if (( okA==0 || okB==0 )); then echo "[FAIL] One or both files broken. A_ok=$okA B_ok=$okB" >&2; exit 1; fi
|
|
echo "[OK] Both files decoded cleanly."
|
|
|
|
echo; echo "== Step 2: Snapshot compare ==";
|
|
if ! same_movie_or_skip "$A" "$B"; then
|
|
echo "[FAIL] Files are not the same movie (SSIM < ${SNAP_SSIM})." >&2; exit 1
|
|
fi
|
|
echo "[OK] Same movie."
|
|
|
|
echo; echo "== Step 3: External compare ($IMPL) =="; run_impl "$IMPL" "$A" "$B"
|
|
|
|
echo; echo "== Step 4: Quality selection (prefer ${PREF_HEIGHT}p) ==";
|
|
read -r keep drop why <<<"$(pick_winner "$A" "$B" | tr '|' ' ')"
|
|
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
|
|
|
echo; echo "== Step 5: Action =="; act_on_loser "$drop" "$keep"
|
|
|
|
echo; echo "== Summary =="; echo "Kept: $keep"; echo "Dropped: $drop"
|
|
[[ "$ACTION" == "move" ]] && echo "(Moved loser to: $TRASH_DIR)"
|
|
[[ "$ACTION" == "delete" ]] && echo "(Loser was deleted)"
|
|
[[ $DRY -eq 1 ]] && echo "(Dry-run only; no changes made)"
|
|
}
|
|
|
|
# ----- directory scan helpers/drivers -----
|
|
scan_and_group() {
|
|
if [[ $RECURSIVE -eq 1 ]]; then
|
|
mapfile -t FILES < <(find "$SCAN_DIR" -type f \( -iname '*.mp4' \))
|
|
else
|
|
mapfile -t FILES < <(ls "$SCAN_DIR"/*.mp4 "$SCAN_DIR"/*.MP4 2>/dev/null || true)
|
|
fi
|
|
}
|
|
|
|
extract_key() { # filename -> group key before first DELIM (CI), original case
|
|
local name="$1" low delimlow key
|
|
low="$(echo -n "$name" | tr '[:upper:]' '[:lower:]')"
|
|
delimlow="$(echo -n "$DELIM" | tr '[:upper:]' '[:lower:]')"
|
|
[[ "$low" == *"$delimlow"* ]] || { echo ""; return 0; }
|
|
key="${low%%"$delimlow"*}"
|
|
local cutlen=${#key}
|
|
echo "${name:0:$cutlen}"
|
|
}
|
|
|
|
process_group() { # files...
|
|
local files=("$@") n=${#files[@]}
|
|
if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi
|
|
echo; echo "=== Group (${n} files): prefix before '${DELIM}' ==="
|
|
echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done
|
|
|
|
local best="${files[0]}"
|
|
if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi
|
|
|
|
for ((i=1;i<n;i++)); do
|
|
local cand="${files[i]}"
|
|
if ! check_ok "$cand"; then echo "[WARN] Skipping broken file: $cand"; continue; fi
|
|
echo; echo "--- Compare: $(basename -- "$best") vs $(basename -- "$cand") ---"
|
|
if ! same_movie_or_skip "$best" "$cand"; then
|
|
echo "[SKIP] Snapshot says NOT same movie; leaving both in place."; continue; fi
|
|
run_impl "$IMPL" "$best" "$cand"
|
|
read -r keep drop why <<<"$(pick_winner "$best" "$cand" | tr '|' ' ')"
|
|
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
|
act_on_loser "$drop" "$keep"
|
|
best="$keep"
|
|
done
|
|
|
|
echo; echo "=== Group result: kept $(basename -- "$best") ==="
|
|
}
|
|
|
|
dir_mode() {
|
|
[[ -n "${SCAN_DIR:-}" ]] || SCAN_DIR="."
|
|
[[ -d "$SCAN_DIR" ]] || die "Not a directory: $SCAN_DIR"
|
|
echo ">> Directory scan mode on: $SCAN_DIR (recursive=$RECURSIVE, delimiter='$DELIM')"
|
|
scan_and_group
|
|
if [[ ${#FILES[@]} -eq 0 ]]; then echo "No MP4 files found."; exit 0; fi
|
|
|
|
declare -A groups
|
|
for f in "${FILES[@]}"; do
|
|
base="$(basename -- "$f")"
|
|
key="$(extract_key "$base")"
|
|
[[ -z "$key" ]] && { v "No delimiter in: $base → skip grouping"; continue; }
|
|
groups["$key"]+=$'\n'"$f"
|
|
done
|
|
|
|
if [[ ${#groups[@]} -eq 0 ]]; then echo "No files with delimiter '$DELIM' found."; exit 0; fi
|
|
|
|
for k in "${!groups[@]}"; do
|
|
IFS=$'\n' read -r -d '' -a grpfiles < <(printf "%s" "${groups[$k]}" | sed '/^$/d' && printf '\0')
|
|
process_group "${grpfiles[@]}"
|
|
done
|
|
|
|
echo; echo ">> Directory scan complete."
|
|
}
|
|
|
|
# -------- dispatch --------
|
|
if [[ -n "${SCAN_DIR:-}" ]]; then
|
|
dir_mode
|
|
exit 0
|
|
fi
|
|
|
|
# Pair mode
|
|
if [[ ${#REM_ARR[@]} -lt 2 ]]; then
|
|
echo "Usage (pair): $0 A.mp4 B.mp4 [options]" >&2
|
|
echo " or (scan): $0 --scan-dir [DIR] [--recursive] [options]" >&2
|
|
exit 2
|
|
fi
|
|
A="${REM_ARR[0]}"; B="${REM_ARR[1]}"
|
|
[[ -f "$A" ]] || die "File not found: $A"
|
|
[[ -f "$B" ]] || die "File not found: $B"
|
|
compare_pair "$A" "$B"
|