renam and filter

This commit is contained in:
2025-11-13 14:14:43 +01:00
parent 77d6eca004
commit 3234a19be0

View File

@ -2,49 +2,49 @@
# ============================================================================= # =============================================================================
# rename_and_filter.sh # rename_and_filter.sh
# #
# Version: 1.3.1 # Version: 1.9.0
# Last updated: 2025-10-04 # Last updated: 2025-11-13 (camelCase- & siffersplit borttagen)
# #
# Summary # Summary
# ------- # -------
# Given a directory, a comma-separated file of reserved words, and an optional # Döper om filer i en katalog baserat på en kommaseparerad lista med spärrade
# file extension filter: # ord (reserved words). Matchning sker mot _tokens_ i basnamnet (skiljetecken
# 1) Removes any filename tokens (separated by "_") that match reserved words # "_"), med **exakt token-matchning** (case-insensitivt). Inga substring-
# (case-insensitive, exact token matches). # regler och ingen automatisk camelCase/siffersplit.
# - EXTRA: If a reserved word appears as the last token (e.g., "..._good"),
# it is removed even if it's only delimited on the left side.
# 2) Replaces one or more spaces with a single underscore "_".
# 3) Collapses multiple "_" into a single "_", and trims leading/trailing "_".
# #
# Notes: # Regler (AUKTORITATIV ORDNING)
# - No files are deleted; only renaming occurs. # -----------------------------
# - Processes files only in the given directory (non-recursive). # OBS: Numreringen återspeglar **exakt tillämpningsordning** i koden. Om en
# - Collision handling: if target name exists, suffix "_N" before the extension. # ny regel införs mellan t.ex. 3 och 4 ska **alla efterföljande regler
# - Reserved words match only against the basename tokens (extension excluded). # renumreras**. Referera alltid till regler som "Regel N".
# #
# Extension filter: # Regler:
# - Default: mp4 # 1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord
# - "*" : all files # (case-insensitivt).
# - "jpg" : only .jpg (with or without leading dot accepted, e.g. "jpg" or ".jpg") # 2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett
# längd (t.ex. 2, 22, 123).
# 3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_",
# kollapsa multipla "_" och dela på "_".
# 4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma
# ledande/efterföljande "_" i basnamnet. Extension bibehålls.
# 5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före
# extension.
# #
# Logging: # Noter:
# RENAME: "old_name.ext" => "new_name.ext" # - Ingen fil raderas; endast rename.
# NOCHANGE (dry-run): "file.ext" (nothing to do) # - Icke-rekursiv som standard; använd --recursive för att gå ned i underkataloger.
# - Extensionfilter: en, flera (kommaseparerade) eller "*" för alla filer.
# #
# Usage # Exempel
# ----- # -------
# ./rename_and_filter.sh [--dry-run] <directory> <reserved_words.csv> [extension] # # Torrkörning (default mp4)
# ./rename_and_filter.sh --dry-run ./videos reserved_words.csv
# #
# Examples # # Alla filer rekursivt
# -------- # ./rename_and_filter.sh -n -r ./media reserved_words.csv "*"
# # Dry-run on mp4 files (default):
# ./rename_and_filter.sh --dry-run ./videos ./reserved_words.csv
# #
# # All files: # # jpg & png
# ./rename_and_filter.sh ./media ./reserved_words.csv "*" # ./rename_and_filter.sh -n ./pics reserved_words.csv jpg,png
#
# # Only jpg:
# ./rename_and_filter.sh ./pictures ./reserved_words.csv jpg
# #
# Exit codes # Exit codes
# ---------- # ----------
@ -53,87 +53,87 @@
# 2 Requires bash >= 4 # 2 Requires bash >= 4
# ============================================================================= # =============================================================================
set -euo pipefail # Viktigt: vi kör INTE set -e, för att undvika aborter mitt i loopen.
# Vi behåller -u och pipefail för rimlig säkerhet.
set -uo pipefail
SCRIPT_VERSION="1.9.0"
# Requires bash 4 for associative arrays # Bash 4 krävs för assoc. arrayer & case-conversion
if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then
echo "This script requires bash >= 4." >&2 echo "This script requires bash >= 4." >&2
exit 2 exit 2
fi fi
DRY_RUN=0 DRY_RUN=0
RECURSIVE=0
MIN_LEN=0
VERBOSE=0
print_help() { print_help() {
sed -n '2,160p' "$0" | sed 's/^# \{0,1\}//' sed -n '2,200p' "$0" | sed 's/^# \{0,1\}//'
}
print_rules() {
cat <<'RULES'
Regler (auktoritativ ordning)
1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord (case-insensitivt).
2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett längd (t.ex. 2, 22, 123).
3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_", kollapsa multipla "_" och dela på "_".
4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma ledande/efterföljande "_" i basnamnet. Extension bibehålls.
5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före extension.
RULES
} }
log() { log() {
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
} }
rename_file() {
local src="$1"
local dst="$2"
# Guard: identical path, nothing to do
if [[ "$src" == "$dst" ]]; then
if (( DRY_RUN )); then
# CHANGE: add `--` so basename doesn't treat dash-prefixed names as options
log "NOCHANGE (dry-run): \"$(basename -- "$src")\" (nothing to do)"
fi
return 0
fi
local base ext candidate n
base="${dst##*/}"
ext=""
if [[ "$base" == *.* ]]; then
ext=".${base##*.}"
base="${base%.*}"
fi
candidate="$dst"
n=1
# Avoid collisions (unless it's the same file, already handled above)
while [[ -e "$candidate" && "$src" != "$candidate" ]]; do
candidate="$(dirname "$dst")/${base}_$n$ext"
((n++))
done
if (( DRY_RUN )); then
# CHANGE: add `--` for safe logging
log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\""
else
mv -v -- "$src" "$candidate" >/dev/null
# CHANGE: add `--` for safe logging
log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\""
fi
}
# --- Argument parsing --- # --- Argument parsing ---
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then ARGS=()
print_help while (("$#")); do
exit 0 case "${1}" in
fi -h|--help) print_help; exit 0;;
--print-rules) print_rules; exit 0;;
-n|--dry-run) DRY_RUN=1; shift;;
-r|--recursive) RECURSIVE=1; shift;;
-v|--verbose) VERBOSE=1; shift;;
--min-len) MIN_LEN=${2:-0}; shift 2;;
--) shift; break;;
-* ) echo "Unknown option: $1" >&2; exit 1;;
* ) ARGS+=("$1"); shift;;
esac
done
if [[ "${1:-}" == "-n" || "${1:-}" == "--dry-run" ]]; then # Positional args
DRY_RUN=1 if (( ${#ARGS[@]} < 2 || ${#ARGS[@]} > 3 )); then
shift echo "Error: Provide <directory> <reserved_words.csv> [extension|list|*]. Use --help for info." >&2
fi
if [[ $# -lt 2 || $# -gt 3 ]]; then
echo "Error: Provide <directory> <reserved_words.csv> [extension]. Use --help for info." >&2
exit 1 exit 1
fi fi
DIR="$1" DIR="${ARGS[0]}"
WORD_FILE="$2" WORD_FILE="${ARGS[1]}"
EXT="${3:-mp4}" EXT_ARG="${ARGS[2]:-mp4}"
# Normalize EXT: strip leading dot if present # Normalize extensions list into a csv string (lowercase, strip leading dots)
if [[ "$EXT" == .* ]]; then normalize_ext_list() {
EXT="${EXT#.}" local arg="$1"
fi local -a out=()
if [[ "$arg" == "*" ]]; then
echo "*"
return 0
fi
IFS=',' read -r -a tmp <<< "$arg"
for e in "${tmp[@]}"; do
[[ -z "$e" ]] && continue
e="${e#.}"
out+=("${e,,}")
done
(IFS=','; echo "${out[*]}")
}
EXT_LIST_STR="$(normalize_ext_list "$EXT_ARG")"
# Validate inputs
if [[ ! -d "$DIR" ]]; then if [[ ! -d "$DIR" ]]; then
echo "Error: Directory does not exist: $DIR" >&2 echo "Error: Directory does not exist: $DIR" >&2
exit 1 exit 1
@ -143,67 +143,102 @@ if [[ ! -f "$WORD_FILE" ]]; then
exit 1 exit 1
fi fi
# --- Load reserved words into an associative set --- # --- Load reserved words (to associative set) ---
declare -A RESERVED=() declare -A RESERVED=()
# Allow comma and newlines as separators # Allow comma and newlines as separators
# shellcheck disable=SC2002 mapfile -t _tokens < <(tr ',\r\n' '\n' < "$WORD_FILE")
mapfile -t _tokens < <(cat "$WORD_FILE" | tr ',\r\n' '\n')
for raw in "${_tokens[@]}"; do for raw in "${_tokens[@]}"; do
# trim whitespace # trim whitespace
w="${raw#"${raw%%[![:space:]]*}"}" # ltrim w="${raw#"${raw%%[![:space:]]*}"}"
w="${w%"${w##*[![:space:]]}"}" # rtrim w="${w%"${w##*[![:space:]]}"}"
[[ -z "$w" ]] && continue [[ -z "$w" ]] && continue
RESERVED["${w,,}"]=1 lw="${w,,}" # case-insensitive
lw="${lw//-/_}" # normalisera även ordlista: '-' → '_'
RESERVED["$lw"]=1
done done
log "Processing directory: $DIR (dry-run=$DRY_RUN, extension=${EXT})" # --- Helpers ---
log "Reserved words loaded: ${#RESERVED[@]}" matches_extension() {
local file="$1" ext_list="$2" base ext lc
[[ "$ext_list" == "*" ]] && return 0
base="$(basename "$file")"
[[ "$base" != *.* ]] && return 1
ext="${base##*.}"; lc="${ext,,}"
IFS=',' read -r -a arr <<< "$ext_list"
for e in "${arr[@]}"; do
if [[ "$lc" == "$e" ]]; then return 0; fi
done
return 1
}
# --- Normalize (rules 2 & 3) ---
normalize_name() { normalize_name() {
local name="$1" local name="$1" base ext
# Replace one-or-more spaces with single underscore # strip_reserved har redan gjort whitespace → "_" och en första kollaps
name="$(printf '%s' "$name" | sed -E 's/[[:space:]]+/_/g')"
# Collapse multiple underscores
name="$(printf '%s' "$name" | sed -E 's/_+/_/g')" name="$(printf '%s' "$name" | sed -E 's/_+/_/g')"
# Trim leading/trailing underscores from the basename (keep extension) base="$name"; ext=""
local base ext
base="$name"
ext=""
if [[ "$base" == *.* ]]; then if [[ "$base" == *.* ]]; then
ext=".${base##*.}" ext=".${base##*.}"
base="${base%.*}" base="${base%.*}"
fi fi
base="${base##_}" base="${base#_}"
base="${base%%_}" base="${base%_}"
printf '%s%s' "$base" "$ext" printf '%s%s' "$base" "$ext"
} }
# --- Remove reserved tokens from basename (plus end-token rule) --- candidate_name() {
local dst="$1" src="$2" base ext candidate n
base="${dst##*/}"; ext=""
if [[ "$base" == *.* ]]; then
ext=".${base##*.}"
base="${base%.*}"
fi
candidate="$dst"; n=1
while [[ -e "$candidate" && "$src" != "$candidate" ]]; do
candidate="$(dirname "$dst")/${base}_$n$ext"
((n++))
done
printf '%s' "$candidate"
}
strip_reserved() { strip_reserved() {
local name="$1" local name="$1" base ext token lw
local base ext token lw
local -a parts new_parts=() local -a parts new_parts=()
# CHANGE: pass `--` to basename for dash-prefixed names base="$(basename "$name")"; ext=""
base="$(basename -- "$name")"
ext=""
if [[ "$base" == *.* ]]; then if [[ "$base" == *.* ]]; then
ext=".${base##*.}" ext=".${base##*.}"
base="${base%.*}" base="${base%.*}"
fi fi
# Pre-normalize for tokenization # Pre-normalisering & tokenisering:
# - ersätt '-' → '_'
# - ersätt all whitespace → '_'
# - kollapsa multipla '_'
# - dela på '_'
local norm_base local norm_base
norm_base="$(printf '%s' "$base" | sed -E 's/[[:space:]]+/_/g' | sed -E 's/_+/_/g')" norm_base="$(printf '%s' "$base" \
| sed -E 's/-/_/g' \
| sed -E 's/[[:space:]]+/_/g' \
| sed -E 's/_+/_/g')"
IFS='_' read -r -a parts <<< "$norm_base" IFS='_' read -r -a parts <<< "$norm_base"
for token in "${parts[@]}"; do for token in "${parts[@]}"; do
[[ -z "$token" ]] && continue [[ -z "$token" ]] && continue
lw="${token,,}" lw="${token,,}"
if [[ -n "${RESERVED[$lw]:-}" ]]; then
# (Regel 2) slopa rena siffertokens
if [[ "$token" =~ ^[[:digit:]]+$ ]]; then
(( VERBOSE )) && log "DEBUG: drop numeric token '$token'"
continue continue
fi fi
# (Regel 1) exakt token = reserverat ord (case-insensitivt)
if [[ -n "${RESERVED[$lw]:-}" ]]; then
(( VERBOSE )) && log "DEBUG: drop reserved token '$token'"
continue
fi
new_parts+=("$token") new_parts+=("$token")
done done
@ -214,63 +249,94 @@ strip_reserved() {
new_base="$(IFS=_; echo "${new_parts[*]}")" new_base="$(IFS=_; echo "${new_parts[*]}")"
fi fi
# Extra end-token rule (defensive): if last token is reserved, drop it [[ -z "$new_base" ]] && new_base="untitled"
if [[ -n "$new_base" ]]; then
local last last_lc
last="${new_base##*_}"
last_lc="${last,,}"
if [[ -n "${RESERVED[$last_lc]:-}" ]]; then
if [[ "$new_base" == *_* ]]; then
new_base="${new_base%_*}"
else
new_base=""
fi
fi
fi
# Fallback if everything disappeared
if [[ -z "$new_base" ]]; then
new_base="untitled"
fi
printf '%s%s' "$new_base" "$ext" printf '%s%s' "$new_base" "$ext"
} }
# --- File pattern based on EXT --- # --- Walk files ---
shopt -s nullglob shopt -s nullglob
pattern="*"
if [[ "$EXT" != "*" ]]; then
pattern="*.$EXT"
fi
# --- Main loop --- VISITED=0
for path in "$DIR"/$pattern; do RENAMED=0
[[ -f "$path" ]] || continue UNCHANGED=0
COLLISIONED=0
orig_basename="$(basename -- "$path")" log "rename_and_filter.sh version $SCRIPT_VERSION"
log "Processing directory: $DIR (dry-run=$DRY_RUN, recursive=$RECURSIVE, min-len=$MIN_LEN, extensions=$EXT_LIST_STR, verbose=$VERBOSE)"
log "Reserved words loaded: ${#RESERVED[@]}"
# NEW RULE (minimal): remove ALL leading dashes before processing process_one() {
clean_basename="$orig_basename" local path="$1"
while [[ "$clean_basename" == -* ]]; do [[ -f "$path" ]] || return 0
clean_basename="${clean_basename#-}"
done
# 1) remove reserved tokens from basename (on the cleaned name) if ! matches_extension "$path" "$EXT_LIST_STR"; then
stripped="$(strip_reserved "$clean_basename")" if (( DRY_RUN && VERBOSE )); then
log "SKIP (ext): $path"
fi
return 0
fi
# 2 & 3) normalize (spaces -> "_", collapse "_", trim) ((VISITED++))
if (( DRY_RUN || VERBOSE )); then
log "Checking file: $path"
fi
local orig_basename stripped new_name bn target cand
orig_basename="$(basename "$path")"
stripped="$(strip_reserved "$orig_basename")"
new_name="$(normalize_name "$stripped")" new_name="$(normalize_name "$stripped")"
# If nothing changed, do not attempt to rename at all if (( VERBOSE )); then
if [[ "$new_name" == "$orig_basename" ]]; then log "DEBUG: orig='${orig_basename}' stripped='${stripped}' normalized='${new_name}'"
if (( DRY_RUN )); then
log "NOCHANGE (dry-run): \"${orig_basename}\" (nothing to do)"
fi fi
continue
bn="$new_name"
[[ "$bn" == *.* ]] && bn="${bn%.*}"
if (( MIN_LEN > 0 )) && (( ${#bn} < MIN_LEN )); then
if (( DRY_RUN || VERBOSE )); then
log "NOCHANGE: \"${orig_basename}\" (too short after normalize)"
fi
((UNCHANGED++))
return 0
fi
if [[ "$new_name" == "$orig_basename" ]]; then
if (( DRY_RUN || VERBOSE )); then
log "NOCHANGE: \"${orig_basename}\" (nothing to do)"
fi
((UNCHANGED++))
return 0
fi fi
target="$(dirname "$path")/$new_name" target="$(dirname "$path")/$new_name"
rename_file "$path" "$target" cand="$(candidate_name "$target" "$path")"
done if [[ "$cand" != "$target" ]]; then
((COLLISIONED++))
fi
if (( DRY_RUN )); then
log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\""
else
mv -- "$path" "$cand"
log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\""
fi
((RENAMED++))
}
# --- Huvudloop ---
if (( RECURSIVE )); then
log "[DEBUG] Enter recursive loop (find '$DIR')"
while IFS= read -r -d '' path; do
log "[DEBUG] raw candidate (recursive): $path"
process_one "$path"
done < <(find "$DIR" -type f -print0)
else
log "[DEBUG] Enter non-recursive loop over '$DIR'/*"
for path in "$DIR"/*; do
log "[DEBUG] raw candidate: $path"
process_one "$path"
done
fi
log "Summary: visited=$VISITED, renamed=$RENAMED, unchanged=$UNCHANGED, collisions_adjusted=$COLLISIONED"
log "Done." log "Done."