diff --git a/rename_and_filter.sh b/rename_and_filter.sh index f32448a..8a1baa4 100755 --- a/rename_and_filter.sh +++ b/rename_and_filter.sh @@ -2,49 +2,49 @@ # ============================================================================= # rename_and_filter.sh # -# Version: 1.3.1 -# Last updated: 2025-10-04 +# Version: 1.9.0 +# Last updated: 2025-11-13 (camelCase- & siffersplit borttagen) # # Summary # ------- -# Given a directory, a comma-separated file of reserved words, and an optional -# file extension filter: -# 1) Removes any filename tokens (separated by "_") that match reserved words -# (case-insensitive, exact token matches). -# - EXTRA: If a reserved word appears as the last token (e.g., "..._good"), -# it is removed even if it's only delimited on the left side. -# 2) Replaces one or more spaces with a single underscore "_". -# 3) Collapses multiple "_" into a single "_", and trims leading/trailing "_". +# Döper om filer i en katalog baserat på en kommaseparerad lista med spärrade +# ord (reserved words). Matchning sker mot _tokens_ i basnamnet (skiljetecken +# "_"), med **exakt token-matchning** (case-insensitivt). Inga substring- +# regler och ingen automatisk camelCase/siffersplit. # -# Notes: -# - No files are deleted; only renaming occurs. -# - Processes files only in the given directory (non-recursive). -# - Collision handling: if target name exists, suffix "_N" before the extension. -# - Reserved words match only against the basename tokens (extension excluded). +# Regler (AUKTORITATIV ORDNING) +# ----------------------------- +# OBS: Numreringen återspeglar **exakt tillämpningsordning** i koden. Om en +# ny regel införs mellan t.ex. 3 och 4 ska **alla efterföljande regler +# renumreras**. Referera alltid till regler som "Regel N". # -# Extension filter: -# - Default: mp4 -# - "*" : all files -# - "jpg" : only .jpg (with or without leading dot accepted, e.g. "jpg" or ".jpg") +# Regler: +# 1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord +# (case-insensitivt). +# 2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett +# längd (t.ex. 2, 22, 123). +# 3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_", +# kollapsa multipla "_" och dela på "_". +# 4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma +# ledande/efterföljande "_" i basnamnet. Extension bibehålls. +# 5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före +# extension. # -# Logging: -# RENAME: "old_name.ext" => "new_name.ext" -# NOCHANGE (dry-run): "file.ext" (nothing to do) +# Noter: +# - Ingen fil raderas; endast rename. +# - Icke-rekursiv som standard; använd --recursive för att gå ned i underkataloger. +# - Extensionfilter: en, flera (kommaseparerade) eller "*" för alla filer. # -# Usage -# ----- -# ./rename_and_filter.sh [--dry-run] [extension] +# Exempel +# ------- +# # Torrkörning (default mp4) +# ./rename_and_filter.sh --dry-run ./videos reserved_words.csv # -# Examples -# -------- -# # Dry-run on mp4 files (default): -# ./rename_and_filter.sh --dry-run ./videos ./reserved_words.csv +# # Alla filer rekursivt +# ./rename_and_filter.sh -n -r ./media reserved_words.csv "*" # -# # All files: -# ./rename_and_filter.sh ./media ./reserved_words.csv "*" -# -# # Only jpg: -# ./rename_and_filter.sh ./pictures ./reserved_words.csv jpg +# # jpg & png +# ./rename_and_filter.sh -n ./pics reserved_words.csv jpg,png # # Exit codes # ---------- @@ -53,87 +53,87 @@ # 2 Requires bash >= 4 # ============================================================================= -set -euo pipefail +# Viktigt: vi kör INTE set -e, för att undvika aborter mitt i loopen. +# Vi behåller -u och pipefail för rimlig säkerhet. +set -uo pipefail +SCRIPT_VERSION="1.9.0" -# Requires bash 4 for associative arrays +# Bash 4 krävs för assoc. arrayer & case-conversion if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then echo "This script requires bash >= 4." >&2 exit 2 fi DRY_RUN=0 +RECURSIVE=0 +MIN_LEN=0 +VERBOSE=0 print_help() { - sed -n '2,160p' "$0" | sed 's/^# \{0,1\}//' + sed -n '2,200p' "$0" | sed 's/^# \{0,1\}//' +} + +print_rules() { + cat <<'RULES' +Regler (auktoritativ ordning) +1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord (case-insensitivt). +2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett längd (t.ex. 2, 22, 123). +3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_", kollapsa multipla "_" och dela på "_". +4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma ledande/efterföljande "_" i basnamnet. Extension bibehålls. +5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före extension. +RULES } log() { printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" } -rename_file() { - local src="$1" - local dst="$2" - - # Guard: identical path, nothing to do - if [[ "$src" == "$dst" ]]; then - if (( DRY_RUN )); then - # CHANGE: add `--` so basename doesn't treat dash-prefixed names as options - log "NOCHANGE (dry-run): \"$(basename -- "$src")\" (nothing to do)" - fi - return 0 - fi - - local base ext candidate n - base="${dst##*/}" - ext="" - if [[ "$base" == *.* ]]; then - ext=".${base##*.}" - base="${base%.*}" - fi - candidate="$dst" - n=1 - # Avoid collisions (unless it's the same file, already handled above) - while [[ -e "$candidate" && "$src" != "$candidate" ]]; do - candidate="$(dirname "$dst")/${base}_$n$ext" - ((n++)) - done - - if (( DRY_RUN )); then - # CHANGE: add `--` for safe logging - log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\"" - else - mv -v -- "$src" "$candidate" >/dev/null - # CHANGE: add `--` for safe logging - log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\"" - fi -} - # --- Argument parsing --- -if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then - print_help - exit 0 -fi +ARGS=() +while (("$#")); do + case "${1}" in + -h|--help) print_help; exit 0;; + --print-rules) print_rules; exit 0;; + -n|--dry-run) DRY_RUN=1; shift;; + -r|--recursive) RECURSIVE=1; shift;; + -v|--verbose) VERBOSE=1; shift;; + --min-len) MIN_LEN=${2:-0}; shift 2;; + --) shift; break;; + -* ) echo "Unknown option: $1" >&2; exit 1;; + * ) ARGS+=("$1"); shift;; + esac +done -if [[ "${1:-}" == "-n" || "${1:-}" == "--dry-run" ]]; then - DRY_RUN=1 - shift -fi - -if [[ $# -lt 2 || $# -gt 3 ]]; then - echo "Error: Provide [extension]. Use --help for info." >&2 +# Positional args +if (( ${#ARGS[@]} < 2 || ${#ARGS[@]} > 3 )); then + echo "Error: Provide [extension|list|*]. Use --help for info." >&2 exit 1 fi -DIR="$1" -WORD_FILE="$2" -EXT="${3:-mp4}" +DIR="${ARGS[0]}" +WORD_FILE="${ARGS[1]}" +EXT_ARG="${ARGS[2]:-mp4}" -# Normalize EXT: strip leading dot if present -if [[ "$EXT" == .* ]]; then - EXT="${EXT#.}" -fi +# Normalize extensions list into a csv string (lowercase, strip leading dots) +normalize_ext_list() { + local arg="$1" + local -a out=() + if [[ "$arg" == "*" ]]; then + echo "*" + return 0 + fi + IFS=',' read -r -a tmp <<< "$arg" + for e in "${tmp[@]}"; do + [[ -z "$e" ]] && continue + e="${e#.}" + out+=("${e,,}") + done + (IFS=','; echo "${out[*]}") +} +EXT_LIST_STR="$(normalize_ext_list "$EXT_ARG")" + +# Validate inputs if [[ ! -d "$DIR" ]]; then echo "Error: Directory does not exist: $DIR" >&2 exit 1 @@ -143,67 +143,102 @@ if [[ ! -f "$WORD_FILE" ]]; then exit 1 fi -# --- Load reserved words into an associative set --- +# --- Load reserved words (to associative set) --- declare -A RESERVED=() # Allow comma and newlines as separators -# shellcheck disable=SC2002 -mapfile -t _tokens < <(cat "$WORD_FILE" | tr ',\r\n' '\n') +mapfile -t _tokens < <(tr ',\r\n' '\n' < "$WORD_FILE") for raw in "${_tokens[@]}"; do # trim whitespace - w="${raw#"${raw%%[![:space:]]*}"}" # ltrim - w="${w%"${w##*[![:space:]]}"}" # rtrim + w="${raw#"${raw%%[![:space:]]*}"}" + w="${w%"${w##*[![:space:]]}"}" [[ -z "$w" ]] && continue - RESERVED["${w,,}"]=1 + lw="${w,,}" # case-insensitive + lw="${lw//-/_}" # normalisera även ordlista: '-' → '_' + RESERVED["$lw"]=1 done -log "Processing directory: $DIR (dry-run=$DRY_RUN, extension=${EXT})" -log "Reserved words loaded: ${#RESERVED[@]}" +# --- Helpers --- +matches_extension() { + local file="$1" ext_list="$2" base ext lc + [[ "$ext_list" == "*" ]] && return 0 + base="$(basename "$file")" + [[ "$base" != *.* ]] && return 1 + ext="${base##*.}"; lc="${ext,,}" + IFS=',' read -r -a arr <<< "$ext_list" + for e in "${arr[@]}"; do + if [[ "$lc" == "$e" ]]; then return 0; fi + done + return 1 +} -# --- Normalize (rules 2 & 3) --- normalize_name() { - local name="$1" - # Replace one-or-more spaces with single underscore - name="$(printf '%s' "$name" | sed -E 's/[[:space:]]+/_/g')" - # Collapse multiple underscores + local name="$1" base ext + # strip_reserved har redan gjort whitespace → "_" och en första kollaps name="$(printf '%s' "$name" | sed -E 's/_+/_/g')" - # Trim leading/trailing underscores from the basename (keep extension) - local base ext - base="$name" - ext="" + base="$name"; ext="" if [[ "$base" == *.* ]]; then ext=".${base##*.}" base="${base%.*}" fi - base="${base##_}" - base="${base%%_}" + base="${base#_}" + base="${base%_}" printf '%s%s' "$base" "$ext" } -# --- Remove reserved tokens from basename (plus end-token rule) --- +candidate_name() { + local dst="$1" src="$2" base ext candidate n + base="${dst##*/}"; ext="" + if [[ "$base" == *.* ]]; then + ext=".${base##*.}" + base="${base%.*}" + fi + candidate="$dst"; n=1 + while [[ -e "$candidate" && "$src" != "$candidate" ]]; do + candidate="$(dirname "$dst")/${base}_$n$ext" + ((n++)) + done + printf '%s' "$candidate" +} + strip_reserved() { - local name="$1" - local base ext token lw + local name="$1" base ext token lw local -a parts new_parts=() - # CHANGE: pass `--` to basename for dash-prefixed names - base="$(basename -- "$name")" - ext="" + base="$(basename "$name")"; ext="" if [[ "$base" == *.* ]]; then ext=".${base##*.}" base="${base%.*}" fi - # Pre-normalize for tokenization + # Pre-normalisering & tokenisering: + # - ersätt '-' → '_' + # - ersätt all whitespace → '_' + # - kollapsa multipla '_' + # - dela på '_' local norm_base - norm_base="$(printf '%s' "$base" | sed -E 's/[[:space:]]+/_/g' | sed -E 's/_+/_/g')" + norm_base="$(printf '%s' "$base" \ + | sed -E 's/-/_/g' \ + | sed -E 's/[[:space:]]+/_/g' \ + | sed -E 's/_+/_/g')" IFS='_' read -r -a parts <<< "$norm_base" + for token in "${parts[@]}"; do [[ -z "$token" ]] && continue lw="${token,,}" - if [[ -n "${RESERVED[$lw]:-}" ]]; then + + # (Regel 2) slopa rena siffertokens + if [[ "$token" =~ ^[[:digit:]]+$ ]]; then + (( VERBOSE )) && log "DEBUG: drop numeric token '$token'" continue fi + + # (Regel 1) exakt token = reserverat ord (case-insensitivt) + if [[ -n "${RESERVED[$lw]:-}" ]]; then + (( VERBOSE )) && log "DEBUG: drop reserved token '$token'" + continue + fi + new_parts+=("$token") done @@ -214,63 +249,94 @@ strip_reserved() { new_base="$(IFS=_; echo "${new_parts[*]}")" fi - # Extra end-token rule (defensive): if last token is reserved, drop it - if [[ -n "$new_base" ]]; then - local last last_lc - last="${new_base##*_}" - last_lc="${last,,}" - if [[ -n "${RESERVED[$last_lc]:-}" ]]; then - if [[ "$new_base" == *_* ]]; then - new_base="${new_base%_*}" - else - new_base="" - fi - fi - fi - - # Fallback if everything disappeared - if [[ -z "$new_base" ]]; then - new_base="untitled" - fi - + [[ -z "$new_base" ]] && new_base="untitled" printf '%s%s' "$new_base" "$ext" } -# --- File pattern based on EXT --- +# --- Walk files --- shopt -s nullglob -pattern="*" -if [[ "$EXT" != "*" ]]; then - pattern="*.$EXT" -fi -# --- Main loop --- -for path in "$DIR"/$pattern; do - [[ -f "$path" ]] || continue +VISITED=0 +RENAMED=0 +UNCHANGED=0 +COLLISIONED=0 - orig_basename="$(basename -- "$path")" +log "rename_and_filter.sh version $SCRIPT_VERSION" +log "Processing directory: $DIR (dry-run=$DRY_RUN, recursive=$RECURSIVE, min-len=$MIN_LEN, extensions=$EXT_LIST_STR, verbose=$VERBOSE)" +log "Reserved words loaded: ${#RESERVED[@]}" - # NEW RULE (minimal): remove ALL leading dashes before processing - clean_basename="$orig_basename" - while [[ "$clean_basename" == -* ]]; do - clean_basename="${clean_basename#-}" - done +process_one() { + local path="$1" + [[ -f "$path" ]] || return 0 - # 1) remove reserved tokens from basename (on the cleaned name) - stripped="$(strip_reserved "$clean_basename")" + if ! matches_extension "$path" "$EXT_LIST_STR"; then + if (( DRY_RUN && VERBOSE )); then + log "SKIP (ext): $path" + fi + return 0 + fi - # 2 & 3) normalize (spaces -> "_", collapse "_", trim) + ((VISITED++)) + if (( DRY_RUN || VERBOSE )); then + log "Checking file: $path" + fi + + local orig_basename stripped new_name bn target cand + orig_basename="$(basename "$path")" + stripped="$(strip_reserved "$orig_basename")" new_name="$(normalize_name "$stripped")" - # If nothing changed, do not attempt to rename at all - if [[ "$new_name" == "$orig_basename" ]]; then - if (( DRY_RUN )); then - log "NOCHANGE (dry-run): \"${orig_basename}\" (nothing to do)" + if (( VERBOSE )); then + log "DEBUG: orig='${orig_basename}' stripped='${stripped}' normalized='${new_name}'" + fi + + bn="$new_name" + [[ "$bn" == *.* ]] && bn="${bn%.*}" + if (( MIN_LEN > 0 )) && (( ${#bn} < MIN_LEN )); then + if (( DRY_RUN || VERBOSE )); then + log "NOCHANGE: \"${orig_basename}\" (too short after normalize)" fi - continue + ((UNCHANGED++)) + return 0 + fi + + if [[ "$new_name" == "$orig_basename" ]]; then + if (( DRY_RUN || VERBOSE )); then + log "NOCHANGE: \"${orig_basename}\" (nothing to do)" + fi + ((UNCHANGED++)) + return 0 fi target="$(dirname "$path")/$new_name" - rename_file "$path" "$target" -done + cand="$(candidate_name "$target" "$path")" + if [[ "$cand" != "$target" ]]; then + ((COLLISIONED++)) + fi + if (( DRY_RUN )); then + log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\"" + else + mv -- "$path" "$cand" + log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\"" + fi + ((RENAMED++)) +} + +# --- Huvudloop --- +if (( RECURSIVE )); then + log "[DEBUG] Enter recursive loop (find '$DIR')" + while IFS= read -r -d '' path; do + log "[DEBUG] raw candidate (recursive): $path" + process_one "$path" + done < <(find "$DIR" -type f -print0) +else + log "[DEBUG] Enter non-recursive loop over '$DIR'/*" + for path in "$DIR"/*; do + log "[DEBUG] raw candidate: $path" + process_one "$path" + done +fi + +log "Summary: visited=$VISITED, renamed=$RENAMED, unchanged=$UNCHANGED, collisions_adjusted=$COLLISIONED" log "Done."