#!/bin/bash idx="/storage/disk1/X/idx/blog/$(echo $base_url | cut -d'/' -f3-)/idx" LOCK_FILE="/tmp/my_script.lock" ## Setup idx # Extract the directory from the file path directory_path="$(dirname "$idx")" # Test if the directory exists, and if not, create it if [ ! -d "$directory_path" ]; then mkdir -p "$directory_path" fi # Test if the file exists, and if not, create it if [ ! -e "$idx" ]; then touch "$idx" fi lock(){ echo "Creating lock.." exec 200>"$LOCK_FILE" flock -n 200 || ( echo "Lock failed - process exist!!" && exit 1 ) } unlock(){ echo "Removing lock.." # Release the lock flock -u 200 } _exit(){ unlock exit $1 } get_link_for_item(){ local item=$1 local content #echo "$item" # Use curl to download the URL and store its content in the 'content' variable content=$(curl -s -X POST localhost:3000 -H "Content-Type: text/plain" --data "https://sxyprn.com$item") || { echo "Failed to download the URL: $item" return 1 } #curl -X POST localhost:3000 -H "Content-Type: text/plain" --data 'https://sxyprn.com/post/653e2c6329e1c.html' #["MomPov E233 Malinda - 49 Year Old Horny Divorced Blonde MILF Beauty https://streamvid.net/ozfe24wrw95h #milf #casting #pov #anal - [01:08:17] (29.10.2023) on SexyPorn","https://sxyprn.com/cdn8/c9/22t1338zl607azp5q71zd1s4p6a/DjtVYfJJupZm-lC44cUtgw/1698771257/k615f1vfaardx6lcs07bsab3g6c/x86v5436eb27ck6836209zek16c.vid"] echo "$content" } log(){ local message=$1 echo $(date '+%F %H:%M:%S') "$message" } get_items(){ local url=$1 local content local links # Use curl to download the URL and store its content in the 'content' variable content=$(curl -s "$url") || { log "Failed to download the URL: $url" return 1 } # Print the stored content echo "$content" | sed 's/>/>\n/g' | grep "class='js-pop'" | awk -F"'" '{print $2}' | cut -d'?' -f1 } link_exists_in_file() { local link_to_check="$1" if grep -Fxq "$link_to_check" "$idx"; then return 0 # Link exists in the file else return 1 # Link does not exist in the file fi } download_and_save_link() { local url="$1" local filename="$2" local truncated="${filename:0:50}" local sanitized="${truncated//[^a-zA-Z0-9_.-]/_}" log "Downloading $url with name $sanitized" # Use curl to download the URL and save the content to the specified file if ! curl -L -o "$sanitized.mp4" "$url"; then log "Curl failed to download the URL: $url" return 1 fi } pattern='^\["([^"]*)","([^"]*)"\]$' # Fetch post ie https://sxyprn.com/post/6545eda2cb76e.html do_post(){ local line="$1" if link_exists_in_file "$line"; then log "Link already exists: $line" else log "Resolving link $line" result="$(get_link_for_item "$line")" # Test if the string matches the pattern if [[ $result =~ $pattern ]]; then val1="${BASH_REMATCH[1]}" val2="${BASH_REMATCH[2]}" if download_and_save_link "$val2" "$val1"; then log "Download success - updating index" echo "$line" >> "$idx" else log "Download failed" fi else log "no match $result" fi fi } # Iterate over rows of posts do_list_of_posts(){ local links="$1" # Iterate over rows of posts while IFS= read -r line; do do_post "$line" done <<< "$links" } # Fetch a page of posts ie https://sxyprn.com/Woodman-Casting-X.html do_collection_page(){ local current_url="$1" log "Current page: $current_url" # Download and parse out items links="$(get_items "$current_url")" do_list_of_posts "$links" } usage() { echo "Options -c, -p and -f cannot be used together." _exit 1 } collection="" post="" file="" debug=0 while getopts ":n:d:c:p:f:" opt; do case $opt in d) set -x debug=1 ;; n) # No op no_op="1" ;; c) # Collection if [ -n "$post" ] || [ -n "$file" ] || [ "$OPTARG" == "-p" ] || [ "$OPTARG" == "-f" ]; then usage fi collection="$OPTARG" ;; p) # post if [ -n "$collection" ] || [ -n "$file" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-f" ]; then usage fi post="$OPTARG" ;; f) # file of posts if [ -n "$collection" ] || [ -n "$post" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-p" ]; then usage fi file="$OPTARG" ;; \?) echo "Invalid option: -$OPTARG" _exit 1 ;; :) echo "Option -$OPTARG requires an argument." _exit 1 ;; esac done echo "debug: $debug" lock # Collection if [ -n "$collection" ]; then echo "Scrapping collection.." base_url="$collection" ## For pages 1 to x for ((i=0; i<10; i++)); do if [ $i -eq 0 ]; then current_url="$base_url" else current_url="${base_url}?page=$((i * 30))" fi #Do collection page do_collection_page "$current_url" sleep 10 done fi # Single post if [ -n "$post" ]; then echo "Scrapping post.." do_post "$post" fi # File of posts if [ -n "$file" ]; then echo "Scrapping file" # Check if the file exists if [ -e "$file" ]; then # Open the file for reading while IFS= read -r line; do # Process each line here, for example, echo it do_post "$line" done < "$file" else echo "File not found: $file" fi fi unlock