From 6b3b45fabfdba371fbfd3c69bad2490a34a7854d Mon Sep 17 00:00:00 2001 From: Urban Date: Tue, 14 Oct 2025 15:14:08 +0200 Subject: [PATCH] fix --- videocmp_select.sh | 160 ++++++++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 76 deletions(-) diff --git a/videocmp_select.sh b/videocmp_select.sh index 65d70be..2d93ca9 100755 --- a/videocmp_select.sh +++ b/videocmp_select.sh @@ -5,10 +5,15 @@ # 1) Pair mode: compare A and B # ./videocmp_select.sh A.mp4 B.mp4 [options] # -# 2) Directory scan mode: group by prefix before first "WoodmanCastingX" (case-insensitive) +# 2) Directory scan mode: group files by the **first token before the first underscore** in the basename # ./videocmp_select.sh --scan-dir [DIR] [--recursive] [options] # (DIR defaults to "." if omitted) # +# Examples of grouping by first token: +# Becky_California.mp4, Becky_California.1.mp4 → group key "Becky" +# Bianca_Golden.mp4, Bianca_Golden_Cut.mp4 → group key "Bianca" +# Emylia.mp4, Emylia_Argan.mp4, Emylia_Wish.mp4 → group key "Emylia" +# # Pipeline: # 1) Validate files (ffprobe fields + ffmpeg deep decode) # 2) Confirm same movie via snapshot SSIM @ --snapshot-time (default 12s) @@ -35,7 +40,7 @@ # Directory-scan options: # --scan-dir [DIR] Enable directory mode (DIR optional; default ".") # --recursive, -r Recurse into subfolders -# --delimiter WORD (default: WoodmanCastingX) case-insensitive +# (Note: delimiter-based grouping is deprecated; this script now groups by first underscore token.) # # Exit codes: # 0 success | 1 differ/broken | 2 usage | 3 missing dependency @@ -61,7 +66,6 @@ VERBOSE=0 SCAN_DIR="" RECURSIVE=0 -DELIM="WoodmanCastingX" # -------- helpers -------- need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 3; }; } @@ -101,7 +105,9 @@ while (( i < ${#ARGS[@]} )); do fi ;; --recursive|-r) RECURSIVE=1; i=$((i+1));; - --delimiter) DELIM="${ARGS[i+1]:-}"; i=$((i+2));; + --delimiter|--delim) # backward-compat: ignore but warn once + echo "[WARN] --delimiter is deprecated; grouping now uses first underscore token." >&2 + i=$((i+ (arg=="--delimiter"?2:1) )) ;; --help|-h) grep -E '^# ' "$0" | sed 's/^# //' exit 0 ;; @@ -111,7 +117,7 @@ while (( i < ${#ARGS[@]} )); do esac done -v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE DELIM='$DELIM' PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T" +v "[DBG] Options: SCAN_DIR='${SCAN_DIR:-}' RECURSIVE=$RECURSIVE PREF_HEIGHT=$PREF_HEIGHT SNAP_T=$SNAP_T" # -------- core functions -------- probe_meta() { # file -> "w h dur codec size" @@ -143,8 +149,8 @@ snapshot_compare_ssim() { # f1 f2 time scale -> "ssim" (empty if fail) local f1="$1" f2="$2" t="$3" sc="$4" local tmpd s1 s2 log ssim tmpd="$(mktemp -d)"; s1="$tmpd/1.png"; s2="$tmpd/2.png"; log="$tmpd/cmp.log" - ffmpeg -hide_banner -v error -y -i "$f1" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s1" || true - ffmpeg -hide_banner -v error -y -i "$f2" -ss "$t" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s2" || true + ffmpeg -hide_banner -v error -y -ss "$t" -i "$f1" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s1" || true + ffmpeg -hide_banner -v error -y -ss "$t" -i "$f2" -frames:v 1 -vf "scale=$sc,format=yuv420p" "$s2" || true if [[ ! -s "$s1" || ! -s "$s2" ]]; then rm -rf "$tmpd"; echo ""; return 0; fi ffmpeg -hide_banner -v info -i "$s1" -i "$s2" -lavfi "ssim" -f null - > /dev/null 2> "$log" || true ssim="$(grep -Eo 'All:[0-9]+(\.[0-9]+)?' "$log" | head -n1 | cut -d: -f2)" @@ -231,6 +237,77 @@ same_movie_or_skip() { # A B -> 0 if same (SSIM>=thr), else 1 awk -v s="$ssim" -v thr="$SNAP_SSIM" 'BEGIN{exit !(s+0 >= thr+0)}' } +# ----- directory scan helpers/drivers ----- +scan_and_collect() { + if [[ $RECURSIVE -eq 1 ]]; then + mapfile -t FILES < <(find "$SCAN_DIR" -type f \( -iname '*.mp4' -o -iname '*.m4v' -o -iname '*.mov' -o -iname '*.mkv' \)) + else + mapfile -t FILES < <(find "$SCAN_DIR" -maxdepth 1 -type f \( -iname '*.mp4' -o -iname '*.m4v' -o -iname '*.mov' -o -iname '*.mkv' \)) + fi +} + +# New grouping rule: key = first token of basename (without extension) before the first underscore. +# If there is no underscore, the whole basename (without extension) is the key. +first_underscore_key() { + local base extless key + base="$(basename -- "$1")" + extless="${base%.*}" + if [[ "$extless" == *_* ]]; then + key="${extless%%_*}" + else + key="$extless" + fi + echo "$key" +} + +process_group() { # files... + local files=("$@") n=${#files[@]} + if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi + echo; echo "=== Group (${n} files): first-underscore key '$(first_underscore_key "${files[0]}")' ===" + echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done + + local best="${files[0]}" + if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi + + for ((i=1;i> Directory scan mode on: $SCAN_DIR (recursive=$RECURSIVE, grouping=first-underscore)" + scan_and_collect + if [[ ${#FILES[@]} -eq 0 ]]; then echo "No video files found."; exit 0; fi + + declare -A groups + local f base key + for f in "${FILES[@]}"; do + base="$(basename -- "$f")" + key="$(first_underscore_key "$base")" + groups["$key"]+=$'\n'"$f" + done + + for k in "${!groups[@]}"; do + IFS=$'\n' read -r -d '' -a grpfiles < <(printf "%s" "${groups[$k]}" | sed '/^$/d' | sort -u && printf '\0') + process_group "${grpfiles[@]}" + done + + echo; echo ">> Directory scan complete." +} + # ----- pair comparison driver ----- compare_pair() { # A B local A="$1" B="$2" @@ -259,75 +336,6 @@ compare_pair() { # A B [[ $DRY -eq 1 ]] && echo "(Dry-run only; no changes made)" } -# ----- directory scan helpers/drivers ----- -scan_and_group() { - if [[ $RECURSIVE -eq 1 ]]; then - mapfile -t FILES < <(find "$SCAN_DIR" -type f \( -iname '*.mp4' \)) - else - mapfile -t FILES < <(ls "$SCAN_DIR"/*.mp4 "$SCAN_DIR"/*.MP4 2>/dev/null || true) - fi -} - -extract_key() { # filename -> group key before first DELIM (CI), original case - local name="$1" low delimlow key - low="$(echo -n "$name" | tr '[:upper:]' '[:lower:]')" - delimlow="$(echo -n "$DELIM" | tr '[:upper:]' '[:lower:]')" - [[ "$low" == *"$delimlow"* ]] || { echo ""; return 0; } - key="${low%%"$delimlow"*}" - local cutlen=${#key} - echo "${name:0:$cutlen}" -} - -process_group() { # files... - local files=("$@") n=${#files[@]} - if (( n < 2 )); then v "Group <2 files → skip"; return 0; fi - echo; echo "=== Group (${n} files): prefix before '${DELIM}' ===" - echo "Files:"; for f in "${files[@]}"; do echo " - $f"; done - - local best="${files[0]}" - if ! check_ok "$best"; then echo "[WARN] Skipping broken file: $best"; return 0; fi - - for ((i=1;i> Directory scan mode on: $SCAN_DIR (recursive=$RECURSIVE, delimiter='$DELIM')" - scan_and_group - if [[ ${#FILES[@]} -eq 0 ]]; then echo "No MP4 files found."; exit 0; fi - - declare -A groups - for f in "${FILES[@]}"; do - base="$(basename -- "$f")" - key="$(extract_key "$base")" - [[ -z "$key" ]] && { v "No delimiter in: $base → skip grouping"; continue; } - groups["$key"]+=$'\n'"$f" - done - - if [[ ${#groups[@]} -eq 0 ]]; then echo "No files with delimiter '$DELIM' found."; exit 0; fi - - for k in "${!groups[@]}"; do - IFS=$'\n' read -r -d '' -a grpfiles < <(printf "%s" "${groups[$k]}" | sed '/^$/d' && printf '\0') - process_group "${grpfiles[@]}" - done - - echo; echo ">> Directory scan complete." -} - # -------- dispatch -------- if [[ -n "${SCAN_DIR:-}" ]]; then dir_mode