128 lines
5.0 KiB
Bash
Executable File
128 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# compare_advanced.sh file1.mp4 file2.mp4
|
|
set -euo pipefail
|
|
|
|
f1="${1:-}"; f2="${2:-}"
|
|
if [[ -z "$f1" || -z "$f2" ]]; then
|
|
echo "Usage: $0 <file1> <file2>" >&2
|
|
exit 2
|
|
fi
|
|
for f in "$f1" "$f2"; do
|
|
[[ -f "$f" ]] || { echo "Not found: $f" >&2; exit 2; }
|
|
done
|
|
|
|
have_signature_filter() {
|
|
ffmpeg -hide_banner -filters 2>/dev/null | grep -qE ' V[.*] signature '
|
|
}
|
|
|
|
# Pretty print quick summary (reuse from simple approach)
|
|
summary() {
|
|
local f="$1"
|
|
local size dur v w h fps
|
|
size=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f")
|
|
dur=$(ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 "$f" || echo "")
|
|
v=$(ffprobe -v error -select_streams v:0 -show_entries stream=codec_name,width,height,r_frame_rate -of csv=p=0 "$f" | head -n1)
|
|
IFS=',' read -r vcodec w h rfr <<<"$v"
|
|
if [[ "$rfr" == */* ]]; then
|
|
fps=$(awk -v r="$rfr" 'BEGIN{split(r,a,"/"); if(a[2]==0) print 0; else printf "%.3f", a[1]/a[2]}')
|
|
else
|
|
fps=$(awk -v r="$rfr" 'BEGIN{printf "%.3f", r+0}')
|
|
fi
|
|
printf " Size: %s bytes | Duration: %.3fs | Video: %s, %sx%s @ %.3f fps\n" "$size" "${dur:-0}" "$vcodec" "$w" "$h" "$fps"
|
|
}
|
|
|
|
if have_signature_filter; then
|
|
# Use ffmpeg's signature filter (robust against re-encode/resize)
|
|
tmpdir=$(mktemp -d)
|
|
trap 'rm -rf "$tmpdir"' EXIT
|
|
|
|
sig1="$tmpdir/1.sig"
|
|
sig2="$tmpdir/2.sig"
|
|
log="$tmpdir/compare.log"
|
|
|
|
echo "Computing video fingerprints (signature filter)..."
|
|
ffmpeg -v error -i "$f1" -vf "signature=format=xml:filename=$sig1" -f null - </dev/null
|
|
ffmpeg -v error -i "$f2" -vf "signature=format=xml:filename=$sig2" -f null - </dev/null
|
|
|
|
echo "Comparing fingerprints..."
|
|
# The signature filter can compare two signature files. Different FFmpeg builds print slightly different lines.
|
|
# We run a compare pass and parse a similarity metric from stderr.
|
|
# Try two plausible invocations; accept the first that succeeds.
|
|
if ffmpeg -v info -i "$sig1" -i "$sig2" -filter_complex "signature=compare=1" -f null - 2>"$log"; then
|
|
:
|
|
elif ffmpeg -v info -i "$sig2" -i "$sig1" -filter_complex "signature=compare=1" -f null - 2>>"$log"; then
|
|
:
|
|
fi
|
|
|
|
# Extract a rough similarity ratio (fallback to match count if ratio not printed)
|
|
ratio=$(grep -Eo 'similarity[^0-9]*([0-9]+(\.[0-9]+)?)' "$log" | tail -n1 | awk '{print $NF}' || true)
|
|
matches=$(grep -Eo 'matches[^0-9]*([0-9]+)' "$log" | awk '{print $NF}' | tail -n1 || true)
|
|
|
|
echo
|
|
echo "Quick summaries:"
|
|
echo "File 1: $f1"; summary "$f1"
|
|
echo "File 2: $f2"; summary "$f2"
|
|
echo
|
|
|
|
if [[ -n "${ratio:-}" ]]; then
|
|
echo "Similarity (signature): ${ratio}"
|
|
verdict=$(awk -v r="$ratio" 'BEGIN{ if(r+0 >= 0.80) print "Very likely same content"; else if(r+0 >= 0.60) print "Possibly same with edits"; else print "Likely different"; }')
|
|
elif [[ -n "${matches:-}" ]]; then
|
|
echo "Matching frames (signature): ${matches}"
|
|
verdict=$(awk -v m="$matches" 'BEGIN{ if(m+0 >= 50) print "Very likely same content"; else if(m+0 >= 10) print "Possibly same"; else print "Likely different"; }')
|
|
else
|
|
echo "Could not parse a similarity score from FFmpeg output; check log at: $log"
|
|
verdict="Inconclusive (check logs)"
|
|
fi
|
|
|
|
echo "Verdict: $verdict"
|
|
exit 0
|
|
fi
|
|
|
|
# ---- Fallback heuristic (no signature filter) ----
|
|
# Sample 1 frame every 10s, normalize to small grayscale, hash each frame, compare overlap.
|
|
|
|
echo "Signature filter not available; using frame-sampling heuristic..."
|
|
tmpdir=$(mktemp -d)
|
|
trap 'rm -rf "$tmpdir"' EXIT
|
|
|
|
extract_hashes() {
|
|
local f="$1" base="$2"
|
|
# One frame every 10s, small grayscale to be robust to resizes/re-encodes
|
|
ffmpeg -v error -i "$f" -vf "fps=1/10,scale=160:90,format=gray" -f image2pipe -vcodec png - \
|
|
| sha256sum | awk '{print $1}' > "$tmpdir/${base}.hashes"
|
|
# Note: sha256sum on the whole pipe will hash the full stream; we want per-frame hashes.
|
|
# If system sha256sum collapses the stream to one hash, fall back to numbered files:
|
|
if [[ ! -s "$tmpdir/${base}.hashes" || $(wc -l < "$tmpdir/${base}.hashes") -le 1 ]]; then
|
|
rm -f "$tmpdir/${base}"_*.png
|
|
ffmpeg -v error -i "$f" -vf "fps=1/10,scale=160:90,format=gray" "$tmpdir/${base}_%05d.png"
|
|
( cd "$tmpdir" && for p in ${base}_*.png; do sha256sum "$p" | awk '{print $1}'; done ) > "$tmpdir/${base}.hashes"
|
|
rm -f "$tmpdir/${base}"_*.png
|
|
fi
|
|
}
|
|
|
|
extract_hashes "$f1" A
|
|
extract_hashes "$f2" B
|
|
|
|
A="$tmpdir/A.hashes"; B="$tmpdir/B.hashes"
|
|
countA=$(wc -l < "$A"); countB=$(wc -l < "$B")
|
|
common=$(comm -12 <(sort "$A") <(sort "$B") | wc -l)
|
|
jaccard=$(awk -v c="$common" -v a="$countA" -v b="$countB" 'BEGIN{u=a+b-c; if(u==0) print 0; else printf "%.3f", c/u}')
|
|
|
|
echo
|
|
echo "Quick summaries:"
|
|
echo "File 1: $f1"; summary "$f1"
|
|
echo "File 2: $f2"; summary "$f2"
|
|
echo
|
|
echo "Frame-sampling similarity:"
|
|
echo " Frames hashed: file1=$countA, file2=$countB"
|
|
echo " Common hashes: $common"
|
|
echo " Jaccard index: $jaccard"
|
|
|
|
verdict=$(awk -v j="$jaccard" 'BEGIN{
|
|
if(j+0 >= 0.80) print "Very likely same content";
|
|
else if(j+0 >= 0.50) print "Possibly same content (edits/recodes)";
|
|
else print "Likely different";
|
|
}')
|
|
echo "Verdict: $verdict"
|