fix
This commit is contained in:
@ -5,10 +5,15 @@
|
|||||||
# 1) Pair mode: compare A and B
|
# 1) Pair mode: compare A and B
|
||||||
# ./videocmp_select.sh A.mp4 B.mp4 [options]
|
# ./videocmp_select.sh A.mp4 B.mp4 [options]
|
||||||
#
|
#
|
||||||
# 2) Directory scan mode: group by prefix before first "WoodmanCastingX" (case-insensitive)
|
# 2) Directory scan mode: group files by the **first token before the first underscore** in the basename
|
||||||
# ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options]
|
# ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options]
|
||||||
# (DIR defaults to "." if omitted)
|
# (DIR defaults to "." if omitted)
|
||||||
#
|
#
|
||||||
|
# Examples of grouping by first token:
|
||||||
|
# Becky_California.mp4, Becky_California.1.mp4 → group key "Becky"
|
||||||
|
# Bianca_Golden.mp4, Bianca_Golden_Cut.mp4 → group key "Bianca"
|
||||||
|
# Emylia.mp4, Emylia_Argan.mp4, Emylia_Wish.mp4 → group key "Emylia"
|
||||||
|
#
|
||||||
# Pipeline:
|
# Pipeline:
|
||||||
# 1) Validate files (ffprobe fields + ffmpeg deep decode)
|
# 1) Validate files (ffprobe fields + ffmpeg deep decode)
|
||||||
# 2) Confirm same movie via snapshot SSIM @ --snapshot-time (default 12s)
|
# 2) Confirm same movie via snapshot SSIM @ --snapshot-time (default 12s)
|
||||||
@ -35,7 +40,7 @@
|
|||||||
# Directory-scan options:
|
# Directory-scan options:
|
||||||
# --scan-dir [DIR] Enable directory mode (DIR optional; default ".")
|
# --scan-dir [DIR] Enable directory mode (DIR optional; default ".")
|
||||||
# --recursive, -r Recurse into subfolders
|
# --recursive, -r Recurse into subfolders
|
||||||
# --delimiter WORD (default: WoodmanCastingX) case-insensitive
|
# (Note: delimiter-based grouping is deprecated; this script now groups by first underscore token.)
|
||||||
#
|
#
|
||||||
# Exit codes:
|
# Exit codes:
|
||||||
# 0 success | 1 differ/broken | 2 usage | 3 missing dependency
|
# 0 success | 1 differ/broken | 2 usage | 3 missing dependency
|
||||||
@ -61,7 +66,6 @@ VERBOSE=0
|
|||||||
|
|
||||||
SCAN_DIR=""
|
SCAN_DIR=""
|
||||||
RECURSIVE=0
|
RECURSIVE=0
|
||||||
DELIM="WoodmanCastingX"
|
|
||||||
|
|
||||||
# -------- helpers --------
|
# -------- helpers --------
|
||||||
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; }
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; }
|
||||||
@ -101,7 +105,9 @@ while (( i < ${#ARGS[@]} )); do
|
|||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
--recursive|-r) RECURSIVE=1; i=$((i+1));;
|
--recursive|-r) RECURSIVE=1; i=$((i+1));;
|
||||||
--delimiter) DELIM="${ARGS[i+1]:-}"; i=$((i+2));;
|
--delimiter|--delim) # backward-compat: ignore but warn once
|
||||||
|
echo "[WARN] --delimiter is deprecated; grouping now uses first underscore token." >&2
|
||||||
|
i=$((i+ (arg=="--delimiter"?2:1) )) ;;
|
||||||
--help|-h)
|
--help|-h)
|
||||||
grep -E '^# ' "$0" | sed 's/^# //'
|
grep -E '^# ' "$0" | sed 's/^# //'
|
||||||
exit 0 ;;
|
exit 0 ;;
|
||||||
@ -111,7 +117,7 @@ while (( i < ${#ARGS[@]} )); do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE DELIM='$DELIM' PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T"
|
v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T"
|
||||||
|
|
||||||
# -------- core functions --------
|
# -------- core functions --------
|
||||||
probe_meta() { # file -> "w h dur codec size"
|
probe_meta() { # file -> "w h dur codec size"
|
||||||
@ -143,8 +149,8 @@ snapshot_compare_ssim() { # f1 f2 time scale -> "ssim" (empty if fail)
|
|||||||
local f1="$1" f2="$2" t="$3" sc="$4"
|
local f1="$1" f2="$2" t="$3" sc="$4"
|
||||||
local tmpd s1 s2 log ssim
|
local tmpd s1 s2 log ssim
|
||||||
tmpd="$(mktemp -d)"; s1="$tmpd/1.png"; s2="$tmpd/2.png"; log="$tmpd/cmp.log"
|
tmpd="$(mktemp -d)"; s1="$tmpd/1.png"; s2="$tmpd/2.png"; log="$tmpd/cmp.log"
|
||||||
ffmpeg -hide_banner -v error -y -i "$f1" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s1" || true
|
ffmpeg -hide_banner -v error -y -ss "$t" -i "$f1" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s1" || true
|
||||||
ffmpeg -hide_banner -v error -y -i "$f2" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s2" || true
|
ffmpeg -hide_banner -v error -y -ss "$t" -i "$f2" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s2" || true
|
||||||
if [[ ! -s "$s1" || ! -s "$s2" ]]; then rm -rf "$tmpd"; echo ""; return 0; fi
|
if [[ ! -s "$s1" || ! -s "$s2" ]]; then rm -rf "$tmpd"; echo ""; return 0; fi
|
||||||
ffmpeg -hide_banner -v info -i "$s1" -i "$s2" -lavfi "ssim" -f null - > /dev/null 2> "$log" || true
|
ffmpeg -hide_banner -v info -i "$s1" -i "$s2" -lavfi "ssim" -f null - > /dev/null 2> "$log" || true
|
||||||
ssim="$(grep -Eo 'All:[0-9]+(\.[0-9]+)?' "$log" | head -n1 | cut -d: -f2)"
|
ssim="$(grep -Eo 'All:[0-9]+(\.[0-9]+)?' "$log" | head -n1 | cut -d: -f2)"
|
||||||
@ -231,6 +237,77 @@ same_movie_or_skip() { # A B -> 0 if same (SSIM>=thr), else 1
|
|||||||
awk -v s="$ssim" -v thr="$SNAP_SSIM" 'BEGIN{exit !(s+0 >= thr+0)}'
|
awk -v s="$ssim" -v thr="$SNAP_SSIM" 'BEGIN{exit !(s+0 >= thr+0)}'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ----- directory scan helpers/drivers -----
|
||||||
|
scan_and_collect() {
|
||||||
|
if [[ $RECURSIVE -eq 1 ]]; then
|
||||||
|
mapfile -t FILES < <(find "$SCAN_DIR" -type f \( -iname '*.mp4' -o -iname '*.m4v' -o -iname '*.mov' -o -iname '*.mkv' \))
|
||||||
|
else
|
||||||
|
mapfile -t FILES < <(find "$SCAN_DIR" -maxdepth 1 -type f \( -iname '*.mp4' -o -iname '*.m4v' -o -iname '*.mov' -o -iname '*.mkv' \))
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# New grouping rule: key = first token of basename (without extension) before the first underscore.
|
||||||
|
# If there is no underscore, the whole basename (without extension) is the key.
|
||||||
|
first_underscore_key() {
|
||||||
|
local base extless key
|
||||||
|
base="$(basename -- "$1")"
|
||||||
|
extless="${base%.*}"
|
||||||
|
if [[ "$extless" == *_* ]]; then
|
||||||
|
key="${extless%%_*}"
|
||||||
|
else
|
||||||
|
key="$extless"
|
||||||
|
fi
|
||||||
|
echo "$key"
|
||||||
|
}
|
||||||
|
|
||||||
|
process_group() { # files...
|
||||||
|
local files=("$@") n=${#files[@]}
|
||||||
|
if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi
|
||||||
|
echo; echo "=== Group (${n} files): first-underscore key '$(first_underscore_key "${files[0]}")' ==="
|
||||||
|
echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done
|
||||||
|
|
||||||
|
local best="${files[0]}"
|
||||||
|
if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi
|
||||||
|
|
||||||
|
for ((i=1;i<n;i++)); do
|
||||||
|
local cand="${files[i]}"
|
||||||
|
if ! check_ok "$cand"; then echo "[WARN] Skipping broken file: $cand"; continue; fi
|
||||||
|
echo; echo "--- Compare: $(basename -- "$best") vs $(basename -- "$cand") ---"
|
||||||
|
if ! same_movie_or_skip "$best" "$cand"; then
|
||||||
|
echo "[SKIP] Snapshot says NOT same movie; leaving both in place."; continue; fi
|
||||||
|
run_impl "$IMPL" "$best" "$cand"
|
||||||
|
read -r keep drop why <<<"$(pick_winner "$best" "$cand" | tr '|' ' ')"
|
||||||
|
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
||||||
|
act_on_loser "$drop" "$keep"
|
||||||
|
best="$keep"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo; echo "=== Group result: kept $(basename -- "$best") ==="
|
||||||
|
}
|
||||||
|
|
||||||
|
dir_mode() {
|
||||||
|
[[ -n "${SCAN_DIR:-}" ]] || SCAN_DIR="."
|
||||||
|
[[ -d "$SCAN_DIR" ]] || die "Not a directory: $SCAN_DIR"
|
||||||
|
echo ">> Directory scan mode on: $SCAN_DIR (recursive=$RECURSIVE, grouping=first-underscore)"
|
||||||
|
scan_and_collect
|
||||||
|
if [[ ${#FILES[@]} -eq 0 ]]; then echo "No video files found."; exit 0; fi
|
||||||
|
|
||||||
|
declare -A groups
|
||||||
|
local f base key
|
||||||
|
for f in "${FILES[@]}"; do
|
||||||
|
base="$(basename -- "$f")"
|
||||||
|
key="$(first_underscore_key "$base")"
|
||||||
|
groups["$key"]+=$'\n'"$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
for k in "${!groups[@]}"; do
|
||||||
|
IFS=$'\n' read -r -d '' -a grpfiles < <(printf "%s" "${groups[$k]}" | sed '/^$/d' | sort -u && printf '\0')
|
||||||
|
process_group "${grpfiles[@]}"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo; echo ">> Directory scan complete."
|
||||||
|
}
|
||||||
|
|
||||||
# ----- pair comparison driver -----
|
# ----- pair comparison driver -----
|
||||||
compare_pair() { # A B
|
compare_pair() { # A B
|
||||||
local A="$1" B="$2"
|
local A="$1" B="$2"
|
||||||
@ -259,75 +336,6 @@ compare_pair() { # A B
|
|||||||
[[ $DRY -eq 1 ]] && echo "(Dry-run only; no changes made)"
|
[[ $DRY -eq 1 ]] && echo "(Dry-run only; no changes made)"
|
||||||
}
|
}
|
||||||
|
|
||||||
# ----- directory scan helpers/drivers -----
|
|
||||||
scan_and_group() {
|
|
||||||
if [[ $RECURSIVE -eq 1 ]]; then
|
|
||||||
mapfile -t FILES < <(find "$SCAN_DIR" -type f \( -iname '*.mp4' \))
|
|
||||||
else
|
|
||||||
mapfile -t FILES < <(ls "$SCAN_DIR"/*.mp4 "$SCAN_DIR"/*.MP4 2>/dev/null || true)
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
extract_key() { # filename -> group key before first DELIM (CI), original case
|
|
||||||
local name="$1" low delimlow key
|
|
||||||
low="$(echo -n "$name" | tr '[:upper:]' '[:lower:]')"
|
|
||||||
delimlow="$(echo -n "$DELIM" | tr '[:upper:]' '[:lower:]')"
|
|
||||||
[[ "$low" == *"$delimlow"* ]] || { echo ""; return 0; }
|
|
||||||
key="${low%%"$delimlow"*}"
|
|
||||||
local cutlen=${#key}
|
|
||||||
echo "${name:0:$cutlen}"
|
|
||||||
}
|
|
||||||
|
|
||||||
process_group() { # files...
|
|
||||||
local files=("$@") n=${#files[@]}
|
|
||||||
if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi
|
|
||||||
echo; echo "=== Group (${n} files): prefix before '${DELIM}' ==="
|
|
||||||
echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done
|
|
||||||
|
|
||||||
local best="${files[0]}"
|
|
||||||
if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi
|
|
||||||
|
|
||||||
for ((i=1;i<n;i++)); do
|
|
||||||
local cand="${files[i]}"
|
|
||||||
if ! check_ok "$cand"; then echo "[WARN] Skipping broken file: $cand"; continue; fi
|
|
||||||
echo; echo "--- Compare: $(basename -- "$best") vs $(basename -- "$cand") ---"
|
|
||||||
if ! same_movie_or_skip "$best" "$cand"; then
|
|
||||||
echo "[SKIP] Snapshot says NOT same movie; leaving both in place."; continue; fi
|
|
||||||
run_impl "$IMPL" "$best" "$cand"
|
|
||||||
read -r keep drop why <<<"$(pick_winner "$best" "$cand" | tr '|' ' ')"
|
|
||||||
echo "[DECISION] Keep: $keep"; echo "[DECISION] Drop: $drop"; echo "[REASON] $why"
|
|
||||||
act_on_loser "$drop" "$keep"
|
|
||||||
best="$keep"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo; echo "=== Group result: kept $(basename -- "$best") ==="
|
|
||||||
}
|
|
||||||
|
|
||||||
dir_mode() {
|
|
||||||
[[ -n "${SCAN_DIR:-}" ]] || SCAN_DIR="."
|
|
||||||
[[ -d "$SCAN_DIR" ]] || die "Not a directory: $SCAN_DIR"
|
|
||||||
echo ">> Directory scan mode on: $SCAN_DIR (recursive=$RECURSIVE, delimiter='$DELIM')"
|
|
||||||
scan_and_group
|
|
||||||
if [[ ${#FILES[@]} -eq 0 ]]; then echo "No MP4 files found."; exit 0; fi
|
|
||||||
|
|
||||||
declare -A groups
|
|
||||||
for f in "${FILES[@]}"; do
|
|
||||||
base="$(basename -- "$f")"
|
|
||||||
key="$(extract_key "$base")"
|
|
||||||
[[ -z "$key" ]] && { v "No delimiter in: $base → skip grouping"; continue; }
|
|
||||||
groups["$key"]+=$'\n'"$f"
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#groups[@]} -eq 0 ]]; then echo "No files with delimiter '$DELIM' found."; exit 0; fi
|
|
||||||
|
|
||||||
for k in "${!groups[@]}"; do
|
|
||||||
IFS=$'\n' read -r -d '' -a grpfiles < <(printf "%s" "${groups[$k]}" | sed '/^$/d' && printf '\0')
|
|
||||||
process_group "${grpfiles[@]}"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo; echo ">> Directory scan complete."
|
|
||||||
}
|
|
||||||
|
|
||||||
# -------- dispatch --------
|
# -------- dispatch --------
|
||||||
if [[ -n "${SCAN_DIR:-}" ]]; then
|
if [[ -n "${SCAN_DIR:-}" ]]; then
|
||||||
dir_mode
|
dir_mode
|
||||||
|
|||||||
Reference in New Issue
Block a user