240 lines
5.2 KiB
Bash
240 lines
5.2 KiB
Bash
#!/bin/bash
|
|
idx="/storage/disk1/X/idx/blog/$(echo $base_url | cut -d'/' -f3-)/idx"
|
|
LOCK_FILE="/tmp/my_script.lock"
|
|
|
|
## Setup idx
|
|
# Extract the directory from the file path
|
|
directory_path="$(dirname "$idx")"
|
|
|
|
# Test if the directory exists, and if not, create it
|
|
if [ ! -d "$directory_path" ]; then
|
|
mkdir -p "$directory_path"
|
|
fi
|
|
|
|
# Test if the file exists, and if not, create it
|
|
if [ ! -e "$idx" ]; then
|
|
touch "$idx"
|
|
fi
|
|
|
|
lock(){
|
|
echo "Creating lock.."
|
|
exec 200>"$LOCK_FILE"
|
|
flock -n 200 || ( echo "Lock failed - process exist!!" && exit 1 )
|
|
}
|
|
|
|
unlock(){
|
|
echo "Removing lock.."
|
|
# Release the lock
|
|
flock -u 200
|
|
}
|
|
_exit(){
|
|
unlock
|
|
exit $1
|
|
}
|
|
|
|
get_link_for_item(){
|
|
local item=$1
|
|
local content
|
|
|
|
#echo "$item"
|
|
|
|
# Use curl to download the URL and store its content in the 'content' variable
|
|
|
|
content=$(curl -s -X POST localhost:3000 -H "Content-Type: text/plain" --data "https://sxyprn.com$item") || {
|
|
echo "Failed to download the URL: $item"
|
|
return 1
|
|
}
|
|
|
|
#curl -X POST localhost:3000 -H "Content-Type: text/plain" --data 'https://sxyprn.com/post/653e2c6329e1c.html'
|
|
#["MomPov E233 Malinda - 49 Year Old Horny Divorced Blonde MILF Beauty https://streamvid.net/ozfe24wrw95h #milf #casting #pov #anal - [01:08:17] (29.10.2023) on SexyPorn","https://sxyprn.com/cdn8/c9/22t1338zl607azp5q71zd1s4p6a/DjtVYfJJupZm-lC44cUtgw/1698771257/k615f1vfaardx6lcs07bsab3g6c/x86v5436eb27ck6836209zek16c.vid"]
|
|
|
|
echo "$content"
|
|
}
|
|
|
|
log(){
|
|
local message=$1
|
|
echo $(date '+%F %H:%M:%S') "$message"
|
|
}
|
|
|
|
get_items(){
|
|
local url=$1
|
|
local content
|
|
local links
|
|
|
|
# Use curl to download the URL and store its content in the 'content' variable
|
|
content=$(curl -s "$url") || {
|
|
log "Failed to download the URL: $url"
|
|
return 1
|
|
}
|
|
|
|
# Print the stored content
|
|
echo "$content" | sed 's/>/>\n/g' | grep "class='js-pop'" | awk -F"'" '{print $2}' | cut -d'?' -f1
|
|
}
|
|
|
|
link_exists_in_file() {
|
|
local link_to_check="$1"
|
|
if grep -Fxq "$link_to_check" "$idx"; then
|
|
return 0 # Link exists in the file
|
|
else
|
|
return 1 # Link does not exist in the file
|
|
fi
|
|
}
|
|
|
|
download_and_save_link() {
|
|
local url="$1"
|
|
local filename="$2"
|
|
local truncated="${filename:0:50}"
|
|
local sanitized="${truncated//[^a-zA-Z0-9_.-]/_}"
|
|
|
|
log "Downloading $url with name $sanitized"
|
|
# Use curl to download the URL and save the content to the specified file
|
|
if ! curl -L -o "$sanitized.mp4" "$url"; then
|
|
log "Curl failed to download the URL: $url"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
pattern='^\["([^"]*)","([^"]*)"\]$'
|
|
# Fetch post ie https://sxyprn.com/post/6545eda2cb76e.html
|
|
do_post(){
|
|
local line="$1"
|
|
|
|
if link_exists_in_file "$line"; then
|
|
log "Link already exists: $line"
|
|
else
|
|
log "Resolving link $line"
|
|
result="$(get_link_for_item "$line")"
|
|
|
|
# Test if the string matches the pattern
|
|
if [[ $result =~ $pattern ]]; then
|
|
val1="${BASH_REMATCH[1]}"
|
|
val2="${BASH_REMATCH[2]}"
|
|
if download_and_save_link "$val2" "$val1"; then
|
|
log "Download success - updating index"
|
|
echo "$line" >> "$idx"
|
|
else
|
|
log "Download failed"
|
|
fi
|
|
else
|
|
log "no match $result"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Iterate over rows of posts
|
|
do_list_of_posts(){
|
|
local links="$1"
|
|
|
|
# Iterate over rows of posts
|
|
while IFS= read -r line; do
|
|
do_post "$line"
|
|
done <<< "$links"
|
|
}
|
|
|
|
# Fetch a page of posts ie https://sxyprn.com/Woodman-Casting-X.html
|
|
do_collection_page(){
|
|
local current_url="$1"
|
|
|
|
log "Current page: $current_url"
|
|
|
|
# Download and parse out items
|
|
links="$(get_items "$current_url")"
|
|
|
|
do_list_of_posts "$links"
|
|
}
|
|
|
|
usage() {
|
|
echo "Options -c, -p and -f cannot be used together."
|
|
_exit 1
|
|
}
|
|
|
|
collection=""
|
|
post=""
|
|
file=""
|
|
debug=0
|
|
while getopts ":n:d:c:p:f:" opt; do
|
|
case $opt in
|
|
d)
|
|
set -x
|
|
debug=1
|
|
;;
|
|
n)
|
|
# No op
|
|
no_op="1"
|
|
;;
|
|
c)
|
|
# Collection
|
|
if [ -n "$post" ] || [ -n "$file" ] || [ "$OPTARG" == "-p" ] || [ "$OPTARG" == "-f" ]; then
|
|
usage
|
|
fi
|
|
collection="$OPTARG"
|
|
;;
|
|
p)
|
|
# post
|
|
if [ -n "$collection" ] || [ -n "$file" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-f" ]; then
|
|
usage
|
|
fi
|
|
post="$OPTARG"
|
|
;;
|
|
f)
|
|
# file of posts
|
|
if [ -n "$collection" ] || [ -n "$post" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-p" ]; then
|
|
usage
|
|
fi
|
|
file="$OPTARG"
|
|
;;
|
|
\?)
|
|
echo "Invalid option: -$OPTARG"
|
|
_exit 1
|
|
;;
|
|
:)
|
|
echo "Option -$OPTARG requires an argument."
|
|
_exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
echo "debug: $debug"
|
|
lock
|
|
|
|
# Collection
|
|
if [ -n "$collection" ]; then
|
|
echo "Scrapping collection.."
|
|
base_url="$collection"
|
|
## For pages 1 to x
|
|
for ((i=0; i<10; i++)); do
|
|
if [ $i -eq 0 ]; then
|
|
current_url="$base_url"
|
|
else
|
|
current_url="${base_url}?page=$((i * 30))"
|
|
fi
|
|
|
|
#Do collection page
|
|
do_collection_page "$current_url"
|
|
sleep 10
|
|
done
|
|
fi
|
|
|
|
# Single post
|
|
if [ -n "$post" ]; then
|
|
echo "Scrapping post.."
|
|
do_post "$post"
|
|
fi
|
|
|
|
# File of posts
|
|
if [ -n "$file" ]; then
|
|
echo "Scrapping file"
|
|
# Check if the file exists
|
|
if [ -e "$file" ]; then
|
|
# Open the file for reading
|
|
while IFS= read -r line; do
|
|
# Process each line here, for example, echo it
|
|
do_post "$line"
|
|
done < "$file"
|
|
else
|
|
echo "File not found: $file"
|
|
fi
|
|
fi
|
|
|
|
unlock
|