Files
FileDownloader/download.sh
Urban Modig 89d353cba4 init
2025-09-29 17:21:05 +02:00

240 lines
5.2 KiB
Bash

#!/bin/bash
idx="/storage/disk1/X/idx/blog/$(echo $base_url | cut -d'/' -f3-)/idx"
LOCK_FILE="/tmp/my_script.lock"
## Setup idx
# Extract the directory from the file path
directory_path="$(dirname "$idx")"
# Test if the directory exists, and if not, create it
if [ ! -d "$directory_path" ]; then
mkdir -p "$directory_path"
fi
# Test if the file exists, and if not, create it
if [ ! -e "$idx" ]; then
touch "$idx"
fi
lock(){
echo "Creating lock.."
exec 200>"$LOCK_FILE"
flock -n 200 || ( echo "Lock failed - process exist!!" && exit 1 )
}
unlock(){
echo "Removing lock.."
# Release the lock
flock -u 200
}
_exit(){
unlock
exit $1
}
get_link_for_item(){
local item=$1
local content
#echo "$item"
# Use curl to download the URL and store its content in the 'content' variable
content=$(curl -s -X POST localhost:3000 -H "Content-Type: text/plain" --data "https://sxyprn.com$item") || {
echo "Failed to download the URL: $item"
return 1
}
#curl -X POST localhost:3000 -H "Content-Type: text/plain" --data 'https://sxyprn.com/post/653e2c6329e1c.html'
#["MomPov E233 Malinda - 49 Year Old Horny Divorced Blonde MILF Beauty https://streamvid.net/ozfe24wrw95h #milf #casting #pov #anal - [01:08:17] (29.10.2023) on SexyPorn","https://sxyprn.com/cdn8/c9/22t1338zl607azp5q71zd1s4p6a/DjtVYfJJupZm-lC44cUtgw/1698771257/k615f1vfaardx6lcs07bsab3g6c/x86v5436eb27ck6836209zek16c.vid"]
echo "$content"
}
log(){
local message=$1
echo $(date '+%F %H:%M:%S') "$message"
}
get_items(){
local url=$1
local content
local links
# Use curl to download the URL and store its content in the 'content' variable
content=$(curl -s "$url") || {
log "Failed to download the URL: $url"
return 1
}
# Print the stored content
echo "$content" | sed 's/>/>\n/g' | grep "class='js-pop'" | awk -F"'" '{print $2}' | cut -d'?' -f1
}
link_exists_in_file() {
local link_to_check="$1"
if grep -Fxq "$link_to_check" "$idx"; then
return 0 # Link exists in the file
else
return 1 # Link does not exist in the file
fi
}
download_and_save_link() {
local url="$1"
local filename="$2"
local truncated="${filename:0:50}"
local sanitized="${truncated//[^a-zA-Z0-9_.-]/_}"
log "Downloading $url with name $sanitized"
# Use curl to download the URL and save the content to the specified file
if ! curl -L -o "$sanitized.mp4" "$url"; then
log "Curl failed to download the URL: $url"
return 1
fi
}
pattern='^\["([^"]*)","([^"]*)"\]$'
# Fetch post ie https://sxyprn.com/post/6545eda2cb76e.html
do_post(){
local line="$1"
if link_exists_in_file "$line"; then
log "Link already exists: $line"
else
log "Resolving link $line"
result="$(get_link_for_item "$line")"
# Test if the string matches the pattern
if [[ $result =~ $pattern ]]; then
val1="${BASH_REMATCH[1]}"
val2="${BASH_REMATCH[2]}"
if download_and_save_link "$val2" "$val1"; then
log "Download success - updating index"
echo "$line" >> "$idx"
else
log "Download failed"
fi
else
log "no match $result"
fi
fi
}
# Iterate over rows of posts
do_list_of_posts(){
local links="$1"
# Iterate over rows of posts
while IFS= read -r line; do
do_post "$line"
done <<< "$links"
}
# Fetch a page of posts ie https://sxyprn.com/Woodman-Casting-X.html
do_collection_page(){
local current_url="$1"
log "Current page: $current_url"
# Download and parse out items
links="$(get_items "$current_url")"
do_list_of_posts "$links"
}
usage() {
echo "Options -c, -p and -f cannot be used together."
_exit 1
}
collection=""
post=""
file=""
debug=0
while getopts ":n:d:c:p:f:" opt; do
case $opt in
d)
set -x
debug=1
;;
n)
# No op
no_op="1"
;;
c)
# Collection
if [ -n "$post" ] || [ -n "$file" ] || [ "$OPTARG" == "-p" ] || [ "$OPTARG" == "-f" ]; then
usage
fi
collection="$OPTARG"
;;
p)
# post
if [ -n "$collection" ] || [ -n "$file" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-f" ]; then
usage
fi
post="$OPTARG"
;;
f)
# file of posts
if [ -n "$collection" ] || [ -n "$post" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-p" ]; then
usage
fi
file="$OPTARG"
;;
\?)
echo "Invalid option: -$OPTARG"
_exit 1
;;
:)
echo "Option -$OPTARG requires an argument."
_exit 1
;;
esac
done
echo "debug: $debug"
lock
# Collection
if [ -n "$collection" ]; then
echo "Scrapping collection.."
base_url="$collection"
## For pages 1 to x
for ((i=0; i<10; i++)); do
if [ $i -eq 0 ]; then
current_url="$base_url"
else
current_url="${base_url}?page=$((i * 30))"
fi
#Do collection page
do_collection_page "$current_url"
sleep 10
done
fi
# Single post
if [ -n "$post" ]; then
echo "Scrapping post.."
do_post "$post"
fi
# File of posts
if [ -n "$file" ]; then
echo "Scrapping file"
# Check if the file exists
if [ -e "$file" ]; then
# Open the file for reading
while IFS= read -r line; do
# Process each line here, for example, echo it
do_post "$line"
done < "$file"
else
echo "File not found: $file"
fi
fi
unlock