renam and filter
This commit is contained in:
@ -2,49 +2,49 @@
|
||||
# =============================================================================
|
||||
# rename_and_filter.sh
|
||||
#
|
||||
# Version: 1.3.1
|
||||
# Last updated: 2025-10-04
|
||||
# Version: 1.9.0
|
||||
# Last updated: 2025-11-13 (camelCase- & siffersplit borttagen)
|
||||
#
|
||||
# Summary
|
||||
# -------
|
||||
# Given a directory, a comma-separated file of reserved words, and an optional
|
||||
# file extension filter:
|
||||
# 1) Removes any filename tokens (separated by "_") that match reserved words
|
||||
# (case-insensitive, exact token matches).
|
||||
# - EXTRA: If a reserved word appears as the last token (e.g., "..._good"),
|
||||
# it is removed even if it's only delimited on the left side.
|
||||
# 2) Replaces one or more spaces with a single underscore "_".
|
||||
# 3) Collapses multiple "_" into a single "_", and trims leading/trailing "_".
|
||||
# Döper om filer i en katalog baserat på en kommaseparerad lista med spärrade
|
||||
# ord (reserved words). Matchning sker mot _tokens_ i basnamnet (skiljetecken
|
||||
# "_"), med **exakt token-matchning** (case-insensitivt). Inga substring-
|
||||
# regler och ingen automatisk camelCase/siffersplit.
|
||||
#
|
||||
# Notes:
|
||||
# - No files are deleted; only renaming occurs.
|
||||
# - Processes files only in the given directory (non-recursive).
|
||||
# - Collision handling: if target name exists, suffix "_N" before the extension.
|
||||
# - Reserved words match only against the basename tokens (extension excluded).
|
||||
# Regler (AUKTORITATIV ORDNING)
|
||||
# -----------------------------
|
||||
# OBS: Numreringen återspeglar **exakt tillämpningsordning** i koden. Om en
|
||||
# ny regel införs mellan t.ex. 3 och 4 ska **alla efterföljande regler
|
||||
# renumreras**. Referera alltid till regler som "Regel N".
|
||||
#
|
||||
# Extension filter:
|
||||
# - Default: mp4
|
||||
# - "*" : all files
|
||||
# - "jpg" : only .jpg (with or without leading dot accepted, e.g. "jpg" or ".jpg")
|
||||
# Regler:
|
||||
# 1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord
|
||||
# (case-insensitivt).
|
||||
# 2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett
|
||||
# längd (t.ex. 2, 22, 123).
|
||||
# 3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_",
|
||||
# kollapsa multipla "_" och dela på "_".
|
||||
# 4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma
|
||||
# ledande/efterföljande "_" i basnamnet. Extension bibehålls.
|
||||
# 5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före
|
||||
# extension.
|
||||
#
|
||||
# Logging:
|
||||
# RENAME: "old_name.ext" => "new_name.ext"
|
||||
# NOCHANGE (dry-run): "file.ext" (nothing to do)
|
||||
# Noter:
|
||||
# - Ingen fil raderas; endast rename.
|
||||
# - Icke-rekursiv som standard; använd --recursive för att gå ned i underkataloger.
|
||||
# - Extensionfilter: en, flera (kommaseparerade) eller "*" för alla filer.
|
||||
#
|
||||
# Usage
|
||||
# -----
|
||||
# ./rename_and_filter.sh [--dry-run] <directory> <reserved_words.csv> [extension]
|
||||
# Exempel
|
||||
# -------
|
||||
# # Torrkörning (default mp4)
|
||||
# ./rename_and_filter.sh --dry-run ./videos reserved_words.csv
|
||||
#
|
||||
# Examples
|
||||
# --------
|
||||
# # Dry-run on mp4 files (default):
|
||||
# ./rename_and_filter.sh --dry-run ./videos ./reserved_words.csv
|
||||
# # Alla filer rekursivt
|
||||
# ./rename_and_filter.sh -n -r ./media reserved_words.csv "*"
|
||||
#
|
||||
# # All files:
|
||||
# ./rename_and_filter.sh ./media ./reserved_words.csv "*"
|
||||
#
|
||||
# # Only jpg:
|
||||
# ./rename_and_filter.sh ./pictures ./reserved_words.csv jpg
|
||||
# # jpg & png
|
||||
# ./rename_and_filter.sh -n ./pics reserved_words.csv jpg,png
|
||||
#
|
||||
# Exit codes
|
||||
# ----------
|
||||
@ -53,87 +53,87 @@
|
||||
# 2 Requires bash >= 4
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
# Viktigt: vi kör INTE set -e, för att undvika aborter mitt i loopen.
|
||||
# Vi behåller -u och pipefail för rimlig säkerhet.
|
||||
set -uo pipefail
|
||||
SCRIPT_VERSION="1.9.0"
|
||||
|
||||
# Requires bash 4 for associative arrays
|
||||
# Bash 4 krävs för assoc. arrayer & case-conversion
|
||||
if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then
|
||||
echo "This script requires bash >= 4." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
DRY_RUN=0
|
||||
RECURSIVE=0
|
||||
MIN_LEN=0
|
||||
VERBOSE=0
|
||||
|
||||
print_help() {
|
||||
sed -n '2,160p' "$0" | sed 's/^# \{0,1\}//'
|
||||
sed -n '2,200p' "$0" | sed 's/^# \{0,1\}//'
|
||||
}
|
||||
|
||||
print_rules() {
|
||||
cat <<'RULES'
|
||||
Regler (auktoritativ ordning)
|
||||
1) [strip_reserved] Ta bort tokens som exakt matchar ett spärrat ord (case-insensitivt).
|
||||
2) [strip_reserved] Ta bort tokens som består av enbart siffror, oavsett längd (t.ex. 2, 22, 123).
|
||||
3) [strip_reserved] Normalisera basnamnet: ersätt "-" → "_", whitespace → "_", kollapsa multipla "_" och dela på "_".
|
||||
4) [normalize_name] Finjustera: kollapsa ev. kvarvarande multipla "_", trimma ledande/efterföljande "_" i basnamnet. Extension bibehålls.
|
||||
5) [candidate_name] Krockhantering: om målnamn finns, suffix "_N" före extension.
|
||||
RULES
|
||||
}
|
||||
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
|
||||
}
|
||||
|
||||
rename_file() {
|
||||
local src="$1"
|
||||
local dst="$2"
|
||||
|
||||
# Guard: identical path, nothing to do
|
||||
if [[ "$src" == "$dst" ]]; then
|
||||
if (( DRY_RUN )); then
|
||||
# CHANGE: add `--` so basename doesn't treat dash-prefixed names as options
|
||||
log "NOCHANGE (dry-run): \"$(basename -- "$src")\" (nothing to do)"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
local base ext candidate n
|
||||
base="${dst##*/}"
|
||||
ext=""
|
||||
if [[ "$base" == *.* ]]; then
|
||||
ext=".${base##*.}"
|
||||
base="${base%.*}"
|
||||
fi
|
||||
candidate="$dst"
|
||||
n=1
|
||||
# Avoid collisions (unless it's the same file, already handled above)
|
||||
while [[ -e "$candidate" && "$src" != "$candidate" ]]; do
|
||||
candidate="$(dirname "$dst")/${base}_$n$ext"
|
||||
((n++))
|
||||
# --- Argument parsing ---
|
||||
ARGS=()
|
||||
while (("$#")); do
|
||||
case "${1}" in
|
||||
-h|--help) print_help; exit 0;;
|
||||
--print-rules) print_rules; exit 0;;
|
||||
-n|--dry-run) DRY_RUN=1; shift;;
|
||||
-r|--recursive) RECURSIVE=1; shift;;
|
||||
-v|--verbose) VERBOSE=1; shift;;
|
||||
--min-len) MIN_LEN=${2:-0}; shift 2;;
|
||||
--) shift; break;;
|
||||
-* ) echo "Unknown option: $1" >&2; exit 1;;
|
||||
* ) ARGS+=("$1"); shift;;
|
||||
esac
|
||||
done
|
||||
|
||||
if (( DRY_RUN )); then
|
||||
# CHANGE: add `--` for safe logging
|
||||
log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\""
|
||||
else
|
||||
mv -v -- "$src" "$candidate" >/dev/null
|
||||
# CHANGE: add `--` for safe logging
|
||||
log "RENAME: \"$(basename -- "$src")\" => \"$(basename -- "$candidate")\""
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Argument parsing ---
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
print_help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${1:-}" == "-n" || "${1:-}" == "--dry-run" ]]; then
|
||||
DRY_RUN=1
|
||||
shift
|
||||
fi
|
||||
|
||||
if [[ $# -lt 2 || $# -gt 3 ]]; then
|
||||
echo "Error: Provide <directory> <reserved_words.csv> [extension]. Use --help for info." >&2
|
||||
# Positional args
|
||||
if (( ${#ARGS[@]} < 2 || ${#ARGS[@]} > 3 )); then
|
||||
echo "Error: Provide <directory> <reserved_words.csv> [extension|list|*]. Use --help for info." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DIR="$1"
|
||||
WORD_FILE="$2"
|
||||
EXT="${3:-mp4}"
|
||||
DIR="${ARGS[0]}"
|
||||
WORD_FILE="${ARGS[1]}"
|
||||
EXT_ARG="${ARGS[2]:-mp4}"
|
||||
|
||||
# Normalize EXT: strip leading dot if present
|
||||
if [[ "$EXT" == .* ]]; then
|
||||
EXT="${EXT#.}"
|
||||
# Normalize extensions list into a csv string (lowercase, strip leading dots)
|
||||
normalize_ext_list() {
|
||||
local arg="$1"
|
||||
local -a out=()
|
||||
if [[ "$arg" == "*" ]]; then
|
||||
echo "*"
|
||||
return 0
|
||||
fi
|
||||
IFS=',' read -r -a tmp <<< "$arg"
|
||||
for e in "${tmp[@]}"; do
|
||||
[[ -z "$e" ]] && continue
|
||||
e="${e#.}"
|
||||
out+=("${e,,}")
|
||||
done
|
||||
(IFS=','; echo "${out[*]}")
|
||||
}
|
||||
|
||||
EXT_LIST_STR="$(normalize_ext_list "$EXT_ARG")"
|
||||
|
||||
# Validate inputs
|
||||
if [[ ! -d "$DIR" ]]; then
|
||||
echo "Error: Directory does not exist: $DIR" >&2
|
||||
exit 1
|
||||
@ -143,67 +143,102 @@ if [[ ! -f "$WORD_FILE" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Load reserved words into an associative set ---
|
||||
# --- Load reserved words (to associative set) ---
|
||||
declare -A RESERVED=()
|
||||
# Allow comma and newlines as separators
|
||||
# shellcheck disable=SC2002
|
||||
mapfile -t _tokens < <(cat "$WORD_FILE" | tr ',\r\n' '\n')
|
||||
mapfile -t _tokens < <(tr ',\r\n' '\n' < "$WORD_FILE")
|
||||
for raw in "${_tokens[@]}"; do
|
||||
# trim whitespace
|
||||
w="${raw#"${raw%%[![:space:]]*}"}" # ltrim
|
||||
w="${w%"${w##*[![:space:]]}"}" # rtrim
|
||||
w="${raw#"${raw%%[![:space:]]*}"}"
|
||||
w="${w%"${w##*[![:space:]]}"}"
|
||||
[[ -z "$w" ]] && continue
|
||||
RESERVED["${w,,}"]=1
|
||||
lw="${w,,}" # case-insensitive
|
||||
lw="${lw//-/_}" # normalisera även ordlista: '-' → '_'
|
||||
RESERVED["$lw"]=1
|
||||
done
|
||||
|
||||
log "Processing directory: $DIR (dry-run=$DRY_RUN, extension=${EXT})"
|
||||
log "Reserved words loaded: ${#RESERVED[@]}"
|
||||
# --- Helpers ---
|
||||
matches_extension() {
|
||||
local file="$1" ext_list="$2" base ext lc
|
||||
[[ "$ext_list" == "*" ]] && return 0
|
||||
base="$(basename "$file")"
|
||||
[[ "$base" != *.* ]] && return 1
|
||||
ext="${base##*.}"; lc="${ext,,}"
|
||||
IFS=',' read -r -a arr <<< "$ext_list"
|
||||
for e in "${arr[@]}"; do
|
||||
if [[ "$lc" == "$e" ]]; then return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Normalize (rules 2 & 3) ---
|
||||
normalize_name() {
|
||||
local name="$1"
|
||||
# Replace one-or-more spaces with single underscore
|
||||
name="$(printf '%s' "$name" | sed -E 's/[[:space:]]+/_/g')"
|
||||
# Collapse multiple underscores
|
||||
local name="$1" base ext
|
||||
# strip_reserved har redan gjort whitespace → "_" och en första kollaps
|
||||
name="$(printf '%s' "$name" | sed -E 's/_+/_/g')"
|
||||
# Trim leading/trailing underscores from the basename (keep extension)
|
||||
local base ext
|
||||
base="$name"
|
||||
ext=""
|
||||
base="$name"; ext=""
|
||||
if [[ "$base" == *.* ]]; then
|
||||
ext=".${base##*.}"
|
||||
base="${base%.*}"
|
||||
fi
|
||||
base="${base##_}"
|
||||
base="${base%%_}"
|
||||
base="${base#_}"
|
||||
base="${base%_}"
|
||||
printf '%s%s' "$base" "$ext"
|
||||
}
|
||||
|
||||
# --- Remove reserved tokens from basename (plus end-token rule) ---
|
||||
candidate_name() {
|
||||
local dst="$1" src="$2" base ext candidate n
|
||||
base="${dst##*/}"; ext=""
|
||||
if [[ "$base" == *.* ]]; then
|
||||
ext=".${base##*.}"
|
||||
base="${base%.*}"
|
||||
fi
|
||||
candidate="$dst"; n=1
|
||||
while [[ -e "$candidate" && "$src" != "$candidate" ]]; do
|
||||
candidate="$(dirname "$dst")/${base}_$n$ext"
|
||||
((n++))
|
||||
done
|
||||
printf '%s' "$candidate"
|
||||
}
|
||||
|
||||
strip_reserved() {
|
||||
local name="$1"
|
||||
local base ext token lw
|
||||
local name="$1" base ext token lw
|
||||
local -a parts new_parts=()
|
||||
|
||||
# CHANGE: pass `--` to basename for dash-prefixed names
|
||||
base="$(basename -- "$name")"
|
||||
ext=""
|
||||
base="$(basename "$name")"; ext=""
|
||||
if [[ "$base" == *.* ]]; then
|
||||
ext=".${base##*.}"
|
||||
base="${base%.*}"
|
||||
fi
|
||||
|
||||
# Pre-normalize for tokenization
|
||||
# Pre-normalisering & tokenisering:
|
||||
# - ersätt '-' → '_'
|
||||
# - ersätt all whitespace → '_'
|
||||
# - kollapsa multipla '_'
|
||||
# - dela på '_'
|
||||
local norm_base
|
||||
norm_base="$(printf '%s' "$base" | sed -E 's/[[:space:]]+/_/g' | sed -E 's/_+/_/g')"
|
||||
norm_base="$(printf '%s' "$base" \
|
||||
| sed -E 's/-/_/g' \
|
||||
| sed -E 's/[[:space:]]+/_/g' \
|
||||
| sed -E 's/_+/_/g')"
|
||||
|
||||
IFS='_' read -r -a parts <<< "$norm_base"
|
||||
|
||||
for token in "${parts[@]}"; do
|
||||
[[ -z "$token" ]] && continue
|
||||
lw="${token,,}"
|
||||
if [[ -n "${RESERVED[$lw]:-}" ]]; then
|
||||
|
||||
# (Regel 2) slopa rena siffertokens
|
||||
if [[ "$token" =~ ^[[:digit:]]+$ ]]; then
|
||||
(( VERBOSE )) && log "DEBUG: drop numeric token '$token'"
|
||||
continue
|
||||
fi
|
||||
|
||||
# (Regel 1) exakt token = reserverat ord (case-insensitivt)
|
||||
if [[ -n "${RESERVED[$lw]:-}" ]]; then
|
||||
(( VERBOSE )) && log "DEBUG: drop reserved token '$token'"
|
||||
continue
|
||||
fi
|
||||
|
||||
new_parts+=("$token")
|
||||
done
|
||||
|
||||
@ -214,63 +249,94 @@ strip_reserved() {
|
||||
new_base="$(IFS=_; echo "${new_parts[*]}")"
|
||||
fi
|
||||
|
||||
# Extra end-token rule (defensive): if last token is reserved, drop it
|
||||
if [[ -n "$new_base" ]]; then
|
||||
local last last_lc
|
||||
last="${new_base##*_}"
|
||||
last_lc="${last,,}"
|
||||
if [[ -n "${RESERVED[$last_lc]:-}" ]]; then
|
||||
if [[ "$new_base" == *_* ]]; then
|
||||
new_base="${new_base%_*}"
|
||||
else
|
||||
new_base=""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fallback if everything disappeared
|
||||
if [[ -z "$new_base" ]]; then
|
||||
new_base="untitled"
|
||||
fi
|
||||
|
||||
[[ -z "$new_base" ]] && new_base="untitled"
|
||||
printf '%s%s' "$new_base" "$ext"
|
||||
}
|
||||
|
||||
# --- File pattern based on EXT ---
|
||||
# --- Walk files ---
|
||||
shopt -s nullglob
|
||||
pattern="*"
|
||||
if [[ "$EXT" != "*" ]]; then
|
||||
pattern="*.$EXT"
|
||||
|
||||
VISITED=0
|
||||
RENAMED=0
|
||||
UNCHANGED=0
|
||||
COLLISIONED=0
|
||||
|
||||
log "rename_and_filter.sh version $SCRIPT_VERSION"
|
||||
log "Processing directory: $DIR (dry-run=$DRY_RUN, recursive=$RECURSIVE, min-len=$MIN_LEN, extensions=$EXT_LIST_STR, verbose=$VERBOSE)"
|
||||
log "Reserved words loaded: ${#RESERVED[@]}"
|
||||
|
||||
process_one() {
|
||||
local path="$1"
|
||||
[[ -f "$path" ]] || return 0
|
||||
|
||||
if ! matches_extension "$path" "$EXT_LIST_STR"; then
|
||||
if (( DRY_RUN && VERBOSE )); then
|
||||
log "SKIP (ext): $path"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# --- Main loop ---
|
||||
for path in "$DIR"/$pattern; do
|
||||
[[ -f "$path" ]] || continue
|
||||
((VISITED++))
|
||||
if (( DRY_RUN || VERBOSE )); then
|
||||
log "Checking file: $path"
|
||||
fi
|
||||
|
||||
orig_basename="$(basename -- "$path")"
|
||||
|
||||
# NEW RULE (minimal): remove ALL leading dashes before processing
|
||||
clean_basename="$orig_basename"
|
||||
while [[ "$clean_basename" == -* ]]; do
|
||||
clean_basename="${clean_basename#-}"
|
||||
done
|
||||
|
||||
# 1) remove reserved tokens from basename (on the cleaned name)
|
||||
stripped="$(strip_reserved "$clean_basename")"
|
||||
|
||||
# 2 & 3) normalize (spaces -> "_", collapse "_", trim)
|
||||
local orig_basename stripped new_name bn target cand
|
||||
orig_basename="$(basename "$path")"
|
||||
stripped="$(strip_reserved "$orig_basename")"
|
||||
new_name="$(normalize_name "$stripped")"
|
||||
|
||||
# If nothing changed, do not attempt to rename at all
|
||||
if [[ "$new_name" == "$orig_basename" ]]; then
|
||||
if (( DRY_RUN )); then
|
||||
log "NOCHANGE (dry-run): \"${orig_basename}\" (nothing to do)"
|
||||
if (( VERBOSE )); then
|
||||
log "DEBUG: orig='${orig_basename}' stripped='${stripped}' normalized='${new_name}'"
|
||||
fi
|
||||
continue
|
||||
|
||||
bn="$new_name"
|
||||
[[ "$bn" == *.* ]] && bn="${bn%.*}"
|
||||
if (( MIN_LEN > 0 )) && (( ${#bn} < MIN_LEN )); then
|
||||
if (( DRY_RUN || VERBOSE )); then
|
||||
log "NOCHANGE: \"${orig_basename}\" (too short after normalize)"
|
||||
fi
|
||||
((UNCHANGED++))
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$new_name" == "$orig_basename" ]]; then
|
||||
if (( DRY_RUN || VERBOSE )); then
|
||||
log "NOCHANGE: \"${orig_basename}\" (nothing to do)"
|
||||
fi
|
||||
((UNCHANGED++))
|
||||
return 0
|
||||
fi
|
||||
|
||||
target="$(dirname "$path")/$new_name"
|
||||
rename_file "$path" "$target"
|
||||
done
|
||||
cand="$(candidate_name "$target" "$path")"
|
||||
if [[ "$cand" != "$target" ]]; then
|
||||
((COLLISIONED++))
|
||||
fi
|
||||
|
||||
if (( DRY_RUN )); then
|
||||
log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\""
|
||||
else
|
||||
mv -- "$path" "$cand"
|
||||
log "RENAME: \"$(basename "$path")\" => \"$(basename "$cand")\""
|
||||
fi
|
||||
((RENAMED++))
|
||||
}
|
||||
|
||||
# --- Huvudloop ---
|
||||
if (( RECURSIVE )); then
|
||||
log "[DEBUG] Enter recursive loop (find '$DIR')"
|
||||
while IFS= read -r -d '' path; do
|
||||
log "[DEBUG] raw candidate (recursive): $path"
|
||||
process_one "$path"
|
||||
done < <(find "$DIR" -type f -print0)
|
||||
else
|
||||
log "[DEBUG] Enter non-recursive loop over '$DIR'/*"
|
||||
for path in "$DIR"/*; do
|
||||
log "[DEBUG] raw candidate: $path"
|
||||
process_one "$path"
|
||||
done
|
||||
fi
|
||||
|
||||
log "Summary: visited=$VISITED, renamed=$RENAMED, unchanged=$UNCHANGED, collisions_adjusted=$COLLISIONED"
|
||||
log "Done."
|
||||
|
||||
Reference in New Issue
Block a user