videocmp - new version
This commit is contained in:
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# videocmp_select.sh
|
# videocmp_select.sh — first-underscore grouping + daily log file + de-dup clusters + timing
|
||||||
#
|
#
|
||||||
# Modes:
|
# Modes:
|
||||||
# 1) Pair mode: compare A and B
|
# 1) Pair mode: compare A and B
|
||||||
@ -9,41 +9,36 @@
|
|||||||
# ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options]
|
# ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options]
|
||||||
# (DIR defaults to "." if omitted)
|
# (DIR defaults to "." if omitted)
|
||||||
#
|
#
|
||||||
# Examples of grouping by first token:
|
# Logging:
|
||||||
# Becky_California.mp4, Becky_California.1.mp4 → group key "Becky"
|
# By default, logs are written to: ~/log/videocmp_select.YYMMDD.log
|
||||||
# Bianca_Golden.mp4, Bianca_Golden_Cut.mp4 → group key "Bianca"
|
# Example: ~/log/videocmp_select.251014.log
|
||||||
# Emylia.mp4, Emylia_Argan.mp4, Emylia_Wish.mp4 → group key "Emylia"
|
# Override with: --log-file /path/to/file.log
|
||||||
#
|
#
|
||||||
# Pipeline:
|
# Compare helpers (paths):
|
||||||
# 1) Validate files (ffprobe fields + ffmpeg deep decode)
|
# Defaults point to /home/urban/dev/wcx_script
|
||||||
# 2) Confirm same movie via snapshot SSIM @ --snapshot-time (default 12s)
|
# --impl-simple PATH (default: /home/urban/dev/wcx_script/compare_simple.sh)
|
||||||
# 3) Optional: run external compare impl (simple/advanced) for logging
|
# --impl-advanced PATH (default: /home/urban/dev/wcx_script/compare_advanced.sh)
|
||||||
# 4) Pick preferred: prefer --prefer-height (default 720), then longer duration, then larger file
|
|
||||||
# 5) Act on loser: --action print|move|delete (with --dry-run)
|
|
||||||
#
|
#
|
||||||
# Common options:
|
# De-dup semantics (within each key/group):
|
||||||
# --snapshot-time SEC (default: 12)
|
# • Keep **one** representative for each set of files that are the **same movie**.
|
||||||
# --snapshot-scale WxH (default: 320:-1)
|
# • Two-stage "same movie":
|
||||||
# --snapshot-ssim THRESH (default: 0.97)
|
# 1) Fast path: identical signature (WxH, duration_ms, size) ⇒ same; no SSIM needed.
|
||||||
# --impl simple|advanced (default: simple) # logs only
|
# 2) Else: SSIM snapshot at --snapshot-time; SSIM ≥ --snapshot-ssim ⇒ same.
|
||||||
# --impl-simple PATH (default: ./compare_simple.sh)
|
# • After clustering, pick a single **best** (prefer height → duration → size) and drop the rest.
|
||||||
# --impl-advanced PATH (default: ./compare_advanced.sh)
|
|
||||||
# --impl-optional (default) warn if impl missing
|
|
||||||
# --impl-required error if chosen impl missing
|
|
||||||
# --prefer-height N (default: 720)
|
|
||||||
# --duration-eps SEC (default: 0.0)
|
|
||||||
# --action print|move|delete (default: print)
|
|
||||||
# --trash-dir PATH (default: $HOME/.video_trash)
|
|
||||||
# --dry-run
|
|
||||||
# --verbose
|
|
||||||
#
|
|
||||||
# Directory-scan options:
|
|
||||||
# --scan-dir [DIR] Enable directory mode (DIR optional; default ".")
|
|
||||||
# --recursive, -r Recurse into subfolders
|
|
||||||
# (Note: delimiter-based grouping is deprecated; this script now groups by first underscore token.)
|
|
||||||
#
|
#
|
||||||
# Exit codes:
|
# Exit codes:
|
||||||
# 0 success | 1 differ/broken | 2 usage | 3 missing dependency
|
# 0 success | 1 differ/broken | 2 usage | 3 missing dependency
|
||||||
|
|
||||||
|
# --- shell guard: require bash 4+ ---
|
||||||
|
if [[ -z "${BASH_VERSION:-}" ]]; then
|
||||||
|
echo "[ERR] This script requires bash. Re-running with bash..." >&2
|
||||||
|
exec bash "$0" "$@"
|
||||||
|
fi
|
||||||
|
if (( ${BASH_VERSINFO[0]:-0} < 4 )); then
|
||||||
|
echo "[ERR] bash 4+ required (associative arrays). You have: ${BASH_VERSION}" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
set -u
|
set -u
|
||||||
|
|
||||||
# -------- defaults --------
|
# -------- defaults --------
|
||||||
@ -52,8 +47,8 @@ SNAP_SCALE="320:-1"
|
|||||||
SNAP_SSIM="0.97"
|
SNAP_SSIM="0.97"
|
||||||
|
|
||||||
IMPL="simple"
|
IMPL="simple"
|
||||||
IMPL_SIMPLE="./compare_simple.sh"
|
IMPL_SIMPLE="/home/urban/dev/wcx_script/compare_simple.sh"
|
||||||
IMPL_ADV="./compare_advanced.sh"
|
IMPL_ADV="/home/urban/dev/wcx_script/compare_advanced.sh"
|
||||||
IMPL_REQUIRED=0
|
IMPL_REQUIRED=0
|
||||||
|
|
||||||
PREF_HEIGHT=720
|
PREF_HEIGHT=720
|
||||||
@ -63,15 +58,37 @@ ACTION="print"
|
|||||||
TRASH_DIR="${HOME}/.video_trash"
|
TRASH_DIR="${HOME}/.video_trash"
|
||||||
DRY=0
|
DRY=0
|
||||||
VERBOSE=0
|
VERBOSE=0
|
||||||
|
DEBUG=0
|
||||||
|
|
||||||
SCAN_DIR=""
|
SCAN_DIR=""
|
||||||
RECURSIVE=0
|
RECURSIVE=0
|
||||||
|
|
||||||
|
# Fast-path signature config:
|
||||||
|
# duration is converted to integer milliseconds to avoid FP noise
|
||||||
|
DURATION_MS_PREC=0
|
||||||
|
|
||||||
|
# --- logging defaults ---
|
||||||
|
DATESTAMP=$(date +%y%m%d)
|
||||||
|
LOG_DIR="$HOME/log"
|
||||||
|
LOG_FILE="$LOG_DIR/videocmp_select.${DATESTAMP}.log"
|
||||||
|
|
||||||
# -------- helpers --------
|
# -------- helpers --------
|
||||||
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; }
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; }
|
||||||
v() { [[ $VERBOSE -eq 1 ]] && echo "[LOG]" "$@" >&2; }
|
v() { [[ $VERBOSE -eq 1 ]] && echo "[LOG]" "$@" >&2; }
|
||||||
|
dbg() { [[ $DEBUG -eq 1 ]] && printf '[DBG %(%F %T)T] %s\n' -1 "$*" >&2; }
|
||||||
die() { echo "[ERR]" "$@" >&2; exit 1; }
|
die() { echo "[ERR]" "$@" >&2; exit 1; }
|
||||||
|
|
||||||
|
# current time in milliseconds (portable-ish)
|
||||||
|
now_ms() {
|
||||||
|
local ms s n
|
||||||
|
ms=$(date +%s%3N 2>/dev/null) && { echo "$ms"; return; }
|
||||||
|
s=$(date +%s)
|
||||||
|
n=$(date +%N 2>/dev/null || echo 0)
|
||||||
|
echo $(( s*1000 + n/1000000 ))
|
||||||
|
}
|
||||||
|
# delta in ms
|
||||||
|
duration_ms() { echo $(( $2 - $1 )); }
|
||||||
|
|
||||||
need ffmpeg; need ffprobe; need awk; need grep; need stat; need sed; need tr; need find
|
need ffmpeg; need ffprobe; need awk; need grep; need stat; need sed; need tr; need find
|
||||||
|
|
||||||
# -------- array-based option parser --------
|
# -------- array-based option parser --------
|
||||||
@ -81,6 +98,7 @@ i=0
|
|||||||
while (( i < ${#ARGS[@]} )); do
|
while (( i < ${#ARGS[@]} )); do
|
||||||
arg="${ARGS[i]}"
|
arg="${ARGS[i]}"
|
||||||
case "$arg" in
|
case "$arg" in
|
||||||
|
--log-file) LOG_FILE="${ARGS[i+1]:-}"; i=$((i+2));;
|
||||||
--snapshot-time) SNAP_T="${ARGS[i+1]:-}"; i=$((i+2));;
|
--snapshot-time) SNAP_T="${ARGS[i+1]:-}"; i=$((i+2));;
|
||||||
--snapshot-scale) SNAP_SCALE="${ARGS[i+1]:-}"; i=$((i+2));;
|
--snapshot-scale) SNAP_SCALE="${ARGS[i+1]:-}"; i=$((i+2));;
|
||||||
--snapshot-ssim) SNAP_SSIM="${ARGS[i+1]:-}"; i=$((i+2));;
|
--snapshot-ssim) SNAP_SSIM="${ARGS[i+1]:-}"; i=$((i+2));;
|
||||||
@ -95,8 +113,8 @@ while (( i < ${#ARGS[@]} )); do
|
|||||||
--trash-dir) TRASH_DIR="${ARGS[i+1]:-}"; i=$((i+2));;
|
--trash-dir) TRASH_DIR="${ARGS[i+1]:-}"; i=$((i+2));;
|
||||||
--dry-run) DRY=1; i=$((i+1));;
|
--dry-run) DRY=1; i=$((i+1));;
|
||||||
--verbose) VERBOSE=1; i=$((i+1));;
|
--verbose) VERBOSE=1; i=$((i+1));;
|
||||||
|
--debug) DEBUG=1; VERBOSE=1; i=$((i+1));;
|
||||||
--scan-dir)
|
--scan-dir)
|
||||||
# optional arg: use next token unless it looks like another option
|
|
||||||
next="${ARGS[i+1]:-}"
|
next="${ARGS[i+1]:-}"
|
||||||
if [[ -n "$next" && "$next" != --* ]]; then
|
if [[ -n "$next" && "$next" != --* ]]; then
|
||||||
SCAN_DIR="$next"; i=$((i+2))
|
SCAN_DIR="$next"; i=$((i+2))
|
||||||
@ -105,24 +123,36 @@ while (( i < ${#ARGS[@]} )); do
|
|||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
--recursive|-r) RECURSIVE=1; i=$((i+1));;
|
--recursive|-r) RECURSIVE=1; i=$((i+1));;
|
||||||
--delimiter|--delim) # backward-compat: ignore but warn once
|
|
||||||
echo "[WARN] --delimiter is deprecated; grouping now uses first underscore token." >&2
|
|
||||||
i=$((i+ (arg=="--delimiter"?2:1) )) ;;
|
|
||||||
--help|-h)
|
--help|-h)
|
||||||
grep -E '^# ' "$0" | sed 's/^# //'
|
grep -E '^# ' "$0" | sed 's/^# //' ; exit 0 ;;
|
||||||
exit 0 ;;
|
|
||||||
*)
|
*)
|
||||||
# leave positional for pair mode
|
|
||||||
REM_ARR+=("$arg"); i=$((i+1));;
|
REM_ARR+=("$arg"); i=$((i+1));;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T"
|
# initialize logging sink (after options so --log-file can override)
|
||||||
|
mkdir -p "$LOG_DIR" "$(dirname "$LOG_FILE")" 2>/dev/null || true
|
||||||
|
# send both stdout and stderr through tee to the log file
|
||||||
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||||
|
|
||||||
|
echo "[INFO] Running with: ${BASH:-/bin/bash} ${BASH_VERSION}"
|
||||||
|
echo "[INFO] Logging to: $LOG_FILE"
|
||||||
|
v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T IMPL_SIMPLE=$IMPL_SIMPLE IMPL_ADV=$IMPL_ADV"
|
||||||
|
|
||||||
# -------- core functions --------
|
# -------- core functions --------
|
||||||
probe_meta() { # file -> "w h dur codec size"
|
|
||||||
|
# portable file size (GNU/BSD/BusyBox)
|
||||||
|
get_size() {
|
||||||
|
local f="$1" s
|
||||||
|
if s=$(stat -c %s -- "$f" 2>/dev/null); then echo "$s"; return; fi
|
||||||
|
if s=$(stat -f %z -- "$f" 2>/dev/null); then echo "$s"; return; fi
|
||||||
|
if s=$(wc -c <"$f" 2>/dev/null); then echo "$s"; return; fi
|
||||||
|
echo 0
|
||||||
|
}
|
||||||
|
|
||||||
|
probe_meta() { # file -> "w h dur_sec vcodec size bytes"
|
||||||
local f="$1" size w h dur vcodec
|
local f="$1" size w h dur vcodec
|
||||||
size=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f")
|
size=$(get_size "$f")
|
||||||
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" || echo 0)
|
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" || echo 0)
|
||||||
h=$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=nw=1:nk=1 "$f" || echo 0)
|
h=$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=nw=1:nk=1 "$f" || echo 0)
|
||||||
dur=$(ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 "$f" || echo 0)
|
dur=$(ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 "$f" || echo 0)
|
||||||
@ -130,6 +160,17 @@ probe_meta() { # file -> "w h dur codec size"
|
|||||||
echo "$w $h $dur $vcodec $size"
|
echo "$w $h $dur $vcodec $size"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Convert seconds(float) → ms(int)
|
||||||
|
sec_to_ms() { awk 'BEGIN{ printf("%d", ('"$1"') * 1000 + 0.5) }'; }
|
||||||
|
|
||||||
|
# Build a fast-path signature: width x height : duration_ms : size
|
||||||
|
signature() { # file -> sig string
|
||||||
|
local f="$1" W H DUR VC SIZE DMS
|
||||||
|
read -r W H DUR VC SIZE <<<"$(probe_meta "$f")"
|
||||||
|
DMS=$(sec_to_ms "$DUR")
|
||||||
|
echo "${W}x${H}:${DMS}ms:${SIZE}"
|
||||||
|
}
|
||||||
|
|
||||||
check_ok() { # file -> 0 ok / 1 bad
|
check_ok() { # file -> 0 ok / 1 bad
|
||||||
local f="$1" w h dur
|
local f="$1" w h dur
|
||||||
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" 2>/dev/null || echo "")
|
w=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=nw=1:nk=1 "$f" 2>/dev/null || echo "")
|
||||||
@ -175,6 +216,7 @@ run_impl() { # impl, A, B
|
|||||||
|
|
||||||
score_file() { # file -> "tier720 dur size"
|
score_file() { # file -> "tier720 dur size"
|
||||||
local f="$1"
|
local f="$1"
|
||||||
|
local W H DUR VC SIZE
|
||||||
read -r W H DUR VC SIZE <<<"$(probe_meta "$f")"
|
read -r W H DUR VC SIZE <<<"$(probe_meta "$f")"
|
||||||
local tier=1; [[ "$H" -eq "$PREF_HEIGHT" ]] && tier=0
|
local tier=1; [[ "$H" -eq "$PREF_HEIGHT" ]] && tier=0
|
||||||
echo "$tier $DUR $SIZE"
|
echo "$tier $DUR $SIZE"
|
||||||
@ -182,6 +224,7 @@ score_file() { # file -> "tier720 dur size"
|
|||||||
|
|
||||||
pick_winner() { # A B -> "KEEP|DROP|why"
|
pick_winner() { # A B -> "KEEP|DROP|why"
|
||||||
local a="$1" b="$2"
|
local a="$1" b="$2"
|
||||||
|
local aTier aDur aSize bTier bDur bSize
|
||||||
read -r aTier aDur aSize <<<"$(score_file "$a")"
|
read -r aTier aDur aSize <<<"$(score_file "$a")"
|
||||||
read -r bTier bDur bSize <<<"$(score_file "$b")"
|
read -r bTier bDur bSize <<<"$(score_file "$b")"
|
||||||
v "Quality scores: A[tier=$aTier dur=$aDur size=$aSize] B[tier=$bTier dur=$bDur size=$bSize]"
|
v "Quality scores: A[tier=$aTier dur=$aDur size=$aSize] B[tier=$bTier dur=$bDur size=$bSize]"
|
||||||
@ -201,6 +244,13 @@ pick_winner() { # A B -> "KEEP|DROP|why"
|
|||||||
|
|
||||||
act_on_loser() { # loser keep
|
act_on_loser() { # loser keep
|
||||||
local loser="$1" keeper="$2"
|
local loser="$1" keeper="$2"
|
||||||
|
# Safety: ensure loser belongs to current group (by first token) to prevent cross-group actions
|
||||||
|
local lkey
|
||||||
|
lkey="$(first_underscore_key "$(basename -- "$loser")")"
|
||||||
|
if [[ -n "${CURRENT_GROUP_KEY:-}" && "$lkey" != "$CURRENT_GROUP_KEY" ]]; then
|
||||||
|
echo "[ACTION] skip: '$loser' not in current group '$CURRENT_GROUP_KEY' (lkey='$lkey')"; return 0; fi
|
||||||
|
if [[ ! -e "$loser" ]]; then
|
||||||
|
echo "[ACTION] skip: not found → $loser"; return 0; fi
|
||||||
case "$ACTION" in
|
case "$ACTION" in
|
||||||
print)
|
print)
|
||||||
echo "[ACTION] Keep: $keeper"
|
echo "[ACTION] Keep: $keeper"
|
||||||
@ -211,7 +261,7 @@ act_on_loser() { # loser keep
|
|||||||
if [[ $DRY -eq 1 ]]; then
|
if [[ $DRY -eq 1 ]]; then
|
||||||
echo "[ACTION] dry-run: mv \"$loser\" \"$TRASH_DIR/\""
|
echo "[ACTION] dry-run: mv \"$loser\" \"$TRASH_DIR/\""
|
||||||
else
|
else
|
||||||
mv -- "$loser" "$TRASH_DIR/" && echo "[ACTION] moved to trash: $loser -> $TRASH_DIR/"
|
mv -- "$loser" "$TRASH_DIR/" && echo "[ACTION] moved to trash: $loser -> $TRASH_DIR/" || echo "[ACTION] move failed: $loser"
|
||||||
fi
|
fi
|
||||||
echo "[ACTION] kept: $keeper"
|
echo "[ACTION] kept: $keeper"
|
||||||
;;
|
;;
|
||||||
@ -219,7 +269,7 @@ act_on_loser() { # loser keep
|
|||||||
if [[ $DRY -eq 1 ]]; then
|
if [[ $DRY -eq 1 ]]; then
|
||||||
echo "[ACTION] dry-run: rm \"$loser\""
|
echo "[ACTION] dry-run: rm \"$loser\""
|
||||||
else
|
else
|
||||||
rm -- "$loser" && echo "[ACTION] deleted: $loser"
|
rm -- "$loser" && echo "[ACTION] deleted: $loser" || echo "[ACTION] delete failed: $loser"
|
||||||
fi
|
fi
|
||||||
echo "[ACTION] kept: $keeper"
|
echo "[ACTION] kept: $keeper"
|
||||||
;;
|
;;
|
||||||
@ -246,8 +296,7 @@ scan_and_collect() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# New grouping rule: key = first token of basename (without extension) before the first underscore.
|
# key = first token of basename (without extension) before the first underscore; lowercased
|
||||||
# If there is no underscore, the whole basename (without extension) is the key.
|
|
||||||
first_underscore_key() {
|
first_underscore_key() {
|
||||||
local base extless key
|
local base extless key
|
||||||
base="$(basename -- "$1")"
|
base="$(basename -- "$1")"
|
||||||
@ -257,32 +306,154 @@ first_underscore_key() {
|
|||||||
else
|
else
|
||||||
key="$extless"
|
key="$extless"
|
||||||
fi
|
fi
|
||||||
echo "$key"
|
echo "${key,,}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# union helper: merge cluster B into A (robust to unset B)
|
||||||
|
union_clusters() { # cidA cidB -> merges B into A
|
||||||
|
local A="$1" B="$2" line
|
||||||
|
[[ "$A" == "$B" ]] && return 0
|
||||||
|
# If B doesn't exist (already merged/unset), skip gracefully
|
||||||
|
if [[ -z "${CLUSTER_CONTENT[$B]+x}" ]]; then
|
||||||
|
dbg "union: skip, cluster B=$B not found"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
# Ensure target A content exists
|
||||||
|
if [[ -z "${CLUSTER_CONTENT[$A]+x}" ]]; then
|
||||||
|
CLUSTER_CONTENT["$A"]=""
|
||||||
|
fi
|
||||||
|
while IFS= read -r line; do
|
||||||
|
[[ -z "$line" ]] && continue
|
||||||
|
FILE_TO_CID["$line"]="$A"
|
||||||
|
CLUSTER_CONTENT["$A"]+=$'\n'"$line"
|
||||||
|
done < <(printf "%s\n" "${CLUSTER_CONTENT[$B]}" | sed '/^$/d')
|
||||||
|
unset 'CLUSTER_CONTENT[$B]'
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster_group() { # files... -> create clusters so that "same movie" files share a cid
|
||||||
|
local arr=("$@")
|
||||||
|
local CID=0
|
||||||
|
|
||||||
|
# Use dynamic scoping: refer to process_group's local assoc arrays
|
||||||
|
# shellcheck disable=SC2154
|
||||||
|
: "${FILE_TO_CID[@]:-}" "${CLUSTER_CONTENT[@]:-}" "${SIG_CACHE[@]:-}"
|
||||||
|
|
||||||
|
# init each file in its own cluster
|
||||||
|
local f id
|
||||||
|
for f in "${arr[@]}"; do
|
||||||
|
id=$(( ++CID ))
|
||||||
|
FILE_TO_CID["$f"]="$id"
|
||||||
|
CLUSTER_CONTENT["$id"]+=$'\n'"$f"
|
||||||
|
SIG_CACHE["$f"]="$(signature "$f")"
|
||||||
|
dbg "init cid=$id file=$f sig=${SIG_CACHE[$f]}"
|
||||||
|
done
|
||||||
|
|
||||||
|
local n=${#arr[@]}
|
||||||
|
local i j fi fj ci cj t0 t1
|
||||||
|
for ((i=0;i<n;i++)); do
|
||||||
|
fi="${arr[i]}"; ci="${FILE_TO_CID[$fi]}"
|
||||||
|
for ((j=i+1;j<n;j++)); do
|
||||||
|
fj="${arr[j]}"; cj="${FILE_TO_CID[$fj]}"
|
||||||
|
[[ "$ci" == "$cj" ]] && continue
|
||||||
|
# Fast path: identical signatures (log timing)
|
||||||
|
t0=$(now_ms)
|
||||||
|
if [[ "${SIG_CACHE[$fi]}" == "${SIG_CACHE[$fj]}" ]]; then
|
||||||
|
t1=$(now_ms)
|
||||||
|
echo "[TIME] fast-compare $(basename -- "$fi") vs $(basename -- "$fj"): $((t1-t0)) ms"
|
||||||
|
v "[FAST] Same signature ⇒ union: $(basename -- "$fi") ~ $(basename -- "$fj")"
|
||||||
|
dbg "union(sig) A=$fi(cid=$ci) B=$fj(cid=$cj)"
|
||||||
|
union_clusters "$ci" "$cj"
|
||||||
|
ci="${FILE_TO_CID[$fi]}"; cj="${FILE_TO_CID[$fj]}"; continue
|
||||||
|
fi
|
||||||
|
# SSIM path with timing
|
||||||
|
t0=$(now_ms)
|
||||||
|
if same_movie_or_skip "$fi" "$fj"; then
|
||||||
|
t1=$(now_ms)
|
||||||
|
echo "[TIME] ssim $(basename -- "$fi") vs $(basename -- "$fj"): $((t1-t0)) ms"
|
||||||
|
v "[SSIM] Same movie ⇒ union: $(basename -- "$fi") ~ $(basename -- "$fj")"
|
||||||
|
dbg "union(ssim) A=$fi(cid=$ci) B=$fj(cid=$cj)"
|
||||||
|
union_clusters "$ci" "$cj"
|
||||||
|
ci="${FILE_TO_CID[$fi]}"
|
||||||
|
else
|
||||||
|
t1=$(now_ms)
|
||||||
|
echo "[TIME] ssim $(basename -- "$fi") vs $(basename -- "$fj"): $((t1-t0)) ms"
|
||||||
|
v "[SSIM] Different movie: $(basename -- "$fi") vs $(basename -- "$fj")"
|
||||||
|
dbg "different(ssim) A=$fi B=$fj"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
choose_best_in_cluster() { # prints best file; args: files...
|
||||||
|
local best="${1:-}" cand keep drop why
|
||||||
|
shift || true
|
||||||
|
for cand in "$@"; do
|
||||||
|
read -r keep drop why <<<"$(pick_winner "$best" "$cand" | tr '|' ' ')"
|
||||||
|
v "[PICK] $why"
|
||||||
|
best="$keep"
|
||||||
|
done
|
||||||
|
printf "%s" "$best"
|
||||||
}
|
}
|
||||||
|
|
||||||
process_group() { # files...
|
process_group() { # files...
|
||||||
local files=("$@") n=${#files[@]}
|
local files=("$@") n=${#files[@]}
|
||||||
if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi
|
if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi
|
||||||
echo; echo "=== Group (${n} files): first-underscore key '$(first_underscore_key "${files[0]}")' ==="
|
local key group_t0 group_t1
|
||||||
|
key="$(first_underscore_key "${files[0]}")"
|
||||||
|
CURRENT_GROUP_KEY="$key"
|
||||||
|
group_t0=$(now_ms)
|
||||||
|
|
||||||
|
echo
|
||||||
|
printf '=== Group (%s files): key %s ===\n' "$n" "$key"
|
||||||
echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done
|
echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done
|
||||||
|
|
||||||
local best="${files[0]}"
|
# Filter out broken upfront
|
||||||
if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi
|
local okfiles=()
|
||||||
|
local f
|
||||||
|
for f in "${files[@]}"; do
|
||||||
|
if check_ok "$f"; then okfiles+=("$f"); else echo "[WARN] Skipping broken file: $f"; fi
|
||||||
|
done
|
||||||
|
if (( ${#okfiles[@]} < 2 )); then echo "[INFO] <2 valid files after validation → skip"; return 0; fi
|
||||||
|
|
||||||
for ((i=1;i<n;i++)); do
|
# Per-group cluster state (locals → no bleed across groups)
|
||||||
local cand="${files[i]}"
|
local -A FILE_TO_CID
|
||||||
if ! check_ok "$cand"; then echo "[WARN] Skipping broken file: $cand"; continue; fi
|
local -A CLUSTER_CONTENT
|
||||||
echo; echo "--- Compare: $(basename -- "$best") vs $(basename -- "$cand") ---"
|
local -A SIG_CACHE
|
||||||
if ! same_movie_or_skip "$best" "$cand"; then
|
|
||||||
echo "[SKIP] Snapshot says NOT same movie; leaving both in place."; continue; fi
|
# Always log signatures for transparency
|
||||||
run_impl "$IMPL" "$best" "$cand"
|
echo "Signatures (WxH:duration_ms:size):"
|
||||||
read -r keep drop why <<<"$(pick_winner "$best" "$cand" | tr '|' ' ')"
|
for f in "${okfiles[@]}"; do echo " - $(basename -- "$f") => $(signature "$f")"; done
|
||||||
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
|
||||||
act_on_loser "$drop" "$keep"
|
dbg "group='${key}' files=${#okfiles[@]}"
|
||||||
best="$keep"
|
|
||||||
|
# Build clusters of "same movie"
|
||||||
|
cluster_group "${okfiles[@]}"
|
||||||
|
|
||||||
|
# For each cluster, keep just one best and drop the rest
|
||||||
|
local cid content arr best keepers=()
|
||||||
|
for cid in "${!CLUSTER_CONTENT[@]}"; do
|
||||||
|
content="${CLUSTER_CONTENT[$cid]:-}"
|
||||||
|
mapfile -t arr < <(printf "%s" "$content" | sed '/^$/d')
|
||||||
|
dbg "cluster id=$cid size=${#arr[@]}"
|
||||||
|
if (( ${#arr[@]} == 0 )); then continue; fi
|
||||||
|
if (( ${#arr[@]} == 1 )); then keepers+=("${arr[0]}"); continue; fi
|
||||||
|
best="$(choose_best_in_cluster "${arr[@]}")"
|
||||||
|
keepers+=("$best")
|
||||||
|
# drop all others in the cluster
|
||||||
|
local x
|
||||||
|
for x in "${arr[@]}"; do
|
||||||
|
[[ "$x" == "$best" ]] && continue
|
||||||
|
echo
|
||||||
|
echo "[CLUSTER] $(basename -- "$x") is duplicate of cluster best $(basename -- "$best")"
|
||||||
|
act_on_loser "$x" "$best"
|
||||||
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
echo; echo "=== Group result: kept $(basename -- "$best") ==="
|
echo
|
||||||
|
echo "=== Group result: kept ${#keepers[@]} unique file(s) ==="
|
||||||
|
for f in "${keepers[@]}"; do echo " • $(basename -- "$f")"; done
|
||||||
|
group_t1=$(now_ms)
|
||||||
|
echo "[TIME] group '${key}' took $(duration_ms "$group_t0" "$group_t1") ms"
|
||||||
}
|
}
|
||||||
|
|
||||||
dir_mode() {
|
dir_mode() {
|
||||||
@ -305,7 +476,8 @@ dir_mode() {
|
|||||||
process_group "${grpfiles[@]}"
|
process_group "${grpfiles[@]}"
|
||||||
done
|
done
|
||||||
|
|
||||||
echo; echo ">> Directory scan complete."
|
echo
|
||||||
|
echo ">> Directory scan complete."
|
||||||
}
|
}
|
||||||
|
|
||||||
# ----- pair comparison driver -----
|
# ----- pair comparison driver -----
|
||||||
@ -316,15 +488,19 @@ compare_pair() { # A B
|
|||||||
if (( okA==0 || okB==0 )); then echo "[FAIL] One or both files broken. A_ok=$okA B_ok=$okB" >&2; exit 1; fi
|
if (( okA==0 || okB==0 )); then echo "[FAIL] One or both files broken. A_ok=$okA B_ok=$okB" >&2; exit 1; fi
|
||||||
echo "[OK] Both files decoded cleanly."
|
echo "[OK] Both files decoded cleanly."
|
||||||
|
|
||||||
echo; echo "== Step 2: Snapshot compare ==";
|
echo; echo "== Step 2: Same-movie test (fast path then SSIM) =="
|
||||||
if ! same_movie_or_skip "$A" "$B"; then
|
if [[ "$(signature "$A")" == "$(signature "$B")" ]]; then
|
||||||
echo "[FAIL] Files are not the same movie (SSIM < ${SNAP_SSIM})." >&2; exit 1
|
echo "[OK] Identical signature ⇒ treat as same movie"
|
||||||
|
else
|
||||||
|
if ! same_movie_or_skip "$A" "$B"; then
|
||||||
|
echo "[FAIL] Files are not the same movie (SSIM < ${SNAP_SSIM})." >&2; exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
echo "[OK] Same movie."
|
echo "[OK] Same movie."
|
||||||
|
|
||||||
echo; echo "== Step 3: External compare ($IMPL) =="; run_impl "$IMPL" "$A" "$B"
|
echo; echo "== Step 3: External compare ($IMPL) =="; run_impl "$IMPL" "$A" "$B"
|
||||||
|
|
||||||
echo; echo "== Step 4: Quality selection (prefer ${PREF_HEIGHT}p) ==";
|
echo; echo "== Step 4: Quality selection (prefer ${PREF_HEIGHT}p) =="
|
||||||
read -r keep drop why <<<"$(pick_winner "$A" "$B" | tr '|' ' ')"
|
read -r keep drop why <<<"$(pick_winner "$A" "$B" | tr '|' ' ')"
|
||||||
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user