init
This commit is contained in:
239
download.sh
Normal file
239
download.sh
Normal file
@ -0,0 +1,239 @@
|
||||
#!/bin/bash
|
||||
idx="/storage/disk1/X/idx/blog/$(echo $base_url | cut -d'/' -f3-)/idx"
|
||||
LOCK_FILE="/tmp/my_script.lock"
|
||||
|
||||
## Setup idx
|
||||
# Extract the directory from the file path
|
||||
directory_path="$(dirname "$idx")"
|
||||
|
||||
# Test if the directory exists, and if not, create it
|
||||
if [ ! -d "$directory_path" ]; then
|
||||
mkdir -p "$directory_path"
|
||||
fi
|
||||
|
||||
# Test if the file exists, and if not, create it
|
||||
if [ ! -e "$idx" ]; then
|
||||
touch "$idx"
|
||||
fi
|
||||
|
||||
lock(){
|
||||
echo "Creating lock.."
|
||||
exec 200>"$LOCK_FILE"
|
||||
flock -n 200 || ( echo "Lock failed - process exist!!" && exit 1 )
|
||||
}
|
||||
|
||||
unlock(){
|
||||
echo "Removing lock.."
|
||||
# Release the lock
|
||||
flock -u 200
|
||||
}
|
||||
_exit(){
|
||||
unlock
|
||||
exit $1
|
||||
}
|
||||
|
||||
get_link_for_item(){
|
||||
local item=$1
|
||||
local content
|
||||
|
||||
#echo "$item"
|
||||
|
||||
# Use curl to download the URL and store its content in the 'content' variable
|
||||
|
||||
content=$(curl -s -X POST localhost:3000 -H "Content-Type: text/plain" --data "https://sxyprn.com$item") || {
|
||||
echo "Failed to download the URL: $item"
|
||||
return 1
|
||||
}
|
||||
|
||||
#curl -X POST localhost:3000 -H "Content-Type: text/plain" --data 'https://sxyprn.com/post/653e2c6329e1c.html'
|
||||
#["MomPov E233 Malinda - 49 Year Old Horny Divorced Blonde MILF Beauty https://streamvid.net/ozfe24wrw95h #milf #casting #pov #anal - [01:08:17] (29.10.2023) on SexyPorn","https://sxyprn.com/cdn8/c9/22t1338zl607azp5q71zd1s4p6a/DjtVYfJJupZm-lC44cUtgw/1698771257/k615f1vfaardx6lcs07bsab3g6c/x86v5436eb27ck6836209zek16c.vid"]
|
||||
|
||||
echo "$content"
|
||||
}
|
||||
|
||||
log(){
|
||||
local message=$1
|
||||
echo $(date '+%F %H:%M:%S') "$message"
|
||||
}
|
||||
|
||||
get_items(){
|
||||
local url=$1
|
||||
local content
|
||||
local links
|
||||
|
||||
# Use curl to download the URL and store its content in the 'content' variable
|
||||
content=$(curl -s "$url") || {
|
||||
log "Failed to download the URL: $url"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Print the stored content
|
||||
echo "$content" | sed 's/>/>\n/g' | grep "class='js-pop'" | awk -F"'" '{print $2}' | cut -d'?' -f1
|
||||
}
|
||||
|
||||
link_exists_in_file() {
|
||||
local link_to_check="$1"
|
||||
if grep -Fxq "$link_to_check" "$idx"; then
|
||||
return 0 # Link exists in the file
|
||||
else
|
||||
return 1 # Link does not exist in the file
|
||||
fi
|
||||
}
|
||||
|
||||
download_and_save_link() {
|
||||
local url="$1"
|
||||
local filename="$2"
|
||||
local truncated="${filename:0:50}"
|
||||
local sanitized="${truncated//[^a-zA-Z0-9_.-]/_}"
|
||||
|
||||
log "Downloading $url with name $sanitized"
|
||||
# Use curl to download the URL and save the content to the specified file
|
||||
if ! curl -L -o "$sanitized.mp4" "$url"; then
|
||||
log "Curl failed to download the URL: $url"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
pattern='^\["([^"]*)","([^"]*)"\]$'
|
||||
# Fetch post ie https://sxyprn.com/post/6545eda2cb76e.html
|
||||
do_post(){
|
||||
local line="$1"
|
||||
|
||||
if link_exists_in_file "$line"; then
|
||||
log "Link already exists: $line"
|
||||
else
|
||||
log "Resolving link $line"
|
||||
result="$(get_link_for_item "$line")"
|
||||
|
||||
# Test if the string matches the pattern
|
||||
if [[ $result =~ $pattern ]]; then
|
||||
val1="${BASH_REMATCH[1]}"
|
||||
val2="${BASH_REMATCH[2]}"
|
||||
if download_and_save_link "$val2" "$val1"; then
|
||||
log "Download success - updating index"
|
||||
echo "$line" >> "$idx"
|
||||
else
|
||||
log "Download failed"
|
||||
fi
|
||||
else
|
||||
log "no match $result"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Iterate over rows of posts
|
||||
do_list_of_posts(){
|
||||
local links="$1"
|
||||
|
||||
# Iterate over rows of posts
|
||||
while IFS= read -r line; do
|
||||
do_post "$line"
|
||||
done <<< "$links"
|
||||
}
|
||||
|
||||
# Fetch a page of posts ie https://sxyprn.com/Woodman-Casting-X.html
|
||||
do_collection_page(){
|
||||
local current_url="$1"
|
||||
|
||||
log "Current page: $current_url"
|
||||
|
||||
# Download and parse out items
|
||||
links="$(get_items "$current_url")"
|
||||
|
||||
do_list_of_posts "$links"
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Options -c, -p and -f cannot be used together."
|
||||
_exit 1
|
||||
}
|
||||
|
||||
collection=""
|
||||
post=""
|
||||
file=""
|
||||
debug=0
|
||||
while getopts ":n:d:c:p:f:" opt; do
|
||||
case $opt in
|
||||
d)
|
||||
set -x
|
||||
debug=1
|
||||
;;
|
||||
n)
|
||||
# No op
|
||||
no_op="1"
|
||||
;;
|
||||
c)
|
||||
# Collection
|
||||
if [ -n "$post" ] || [ -n "$file" ] || [ "$OPTARG" == "-p" ] || [ "$OPTARG" == "-f" ]; then
|
||||
usage
|
||||
fi
|
||||
collection="$OPTARG"
|
||||
;;
|
||||
p)
|
||||
# post
|
||||
if [ -n "$collection" ] || [ -n "$file" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-f" ]; then
|
||||
usage
|
||||
fi
|
||||
post="$OPTARG"
|
||||
;;
|
||||
f)
|
||||
# file of posts
|
||||
if [ -n "$collection" ] || [ -n "$post" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-p" ]; then
|
||||
usage
|
||||
fi
|
||||
file="$OPTARG"
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG"
|
||||
_exit 1
|
||||
;;
|
||||
:)
|
||||
echo "Option -$OPTARG requires an argument."
|
||||
_exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "debug: $debug"
|
||||
lock
|
||||
|
||||
# Collection
|
||||
if [ -n "$collection" ]; then
|
||||
echo "Scrapping collection.."
|
||||
base_url="$collection"
|
||||
## For pages 1 to x
|
||||
for ((i=0; i<10; i++)); do
|
||||
if [ $i -eq 0 ]; then
|
||||
current_url="$base_url"
|
||||
else
|
||||
current_url="${base_url}?page=$((i * 30))"
|
||||
fi
|
||||
|
||||
#Do collection page
|
||||
do_collection_page "$current_url"
|
||||
sleep 10
|
||||
done
|
||||
fi
|
||||
|
||||
# Single post
|
||||
if [ -n "$post" ]; then
|
||||
echo "Scrapping post.."
|
||||
do_post "$post"
|
||||
fi
|
||||
|
||||
# File of posts
|
||||
if [ -n "$file" ]; then
|
||||
echo "Scrapping file"
|
||||
# Check if the file exists
|
||||
if [ -e "$file" ]; then
|
||||
# Open the file for reading
|
||||
while IFS= read -r line; do
|
||||
# Process each line here, for example, echo it
|
||||
do_post "$line"
|
||||
done < "$file"
|
||||
else
|
||||
echo "File not found: $file"
|
||||
fi
|
||||
fi
|
||||
|
||||
unlock
|
||||
Reference in New Issue
Block a user