FileDownloader/download.sh

#!/bin/bash
idx="/storage/disk1/X/idx/blog/$(echo $base_url | cut -d'/' -f3-)/idx"
LOCK_FILE="/tmp/my_script.lock"

## Setup idx
# Extract the directory from the file path
directory_path="$(dirname "$idx")"

# Test if the directory exists, and if not, create it
if [ ! -d "$directory_path" ]; then
  mkdir -p "$directory_path"
fi

# Test if the file exists, and if not, create it
if [ ! -e "$idx" ]; then
  touch "$idx"
fi

lock(){
  echo "Creating lock.."
  exec 200>"$LOCK_FILE"
  flock -n 200 || ( echo "Lock failed - process exist!!" && exit 1 )
}

unlock(){
  echo "Removing lock.."
  # Release the lock
  flock -u 200
}
_exit(){
  unlock
  exit $1
}

get_link_for_item(){
  local item=$1
  local content

  #echo "$item"

  # Use curl to download the URL and store its content in the 'content' variable

  content=$(curl -s -X POST localhost:3000 -H "Content-Type: text/plain" --data "https://sxyprn.com$item") || {
    echo "Failed to download the URL: $item"
    return 1
  }

  #curl -X POST localhost:3000 -H "Content-Type: text/plain" --data 'https://sxyprn.com/post/653e2c6329e1c.html'
  #["MomPov E233 Malinda - 49 Year Old Horny Divorced Blonde MILF Beauty https://streamvid.net/ozfe24wrw95h #milf #casting #pov #anal - [01:08:17] (29.10.2023) on SexyPorn","https://sxyprn.com/cdn8/c9/22t1338zl607azp5q71zd1s4p6a/DjtVYfJJupZm-lC44cUtgw/1698771257/k615f1vfaardx6lcs07bsab3g6c/x86v5436eb27ck6836209zek16c.vid"]

  echo "$content"
}

log(){
  local message=$1
  echo $(date '+%F %H:%M:%S') "$message"
}

get_items(){
  local url=$1
  local content
  local links

  # Use curl to download the URL and store its content in the 'content' variable
  content=$(curl -s "$url") || {
    log "Failed to download the URL: $url"
    return 1
  }

  # Print the stored content
  echo "$content" | sed 's/>/>\n/g' | grep "class='js-pop'" | awk -F"'" '{print $2}' | cut -d'?' -f1
}

link_exists_in_file() {
  local link_to_check="$1"
  if grep -Fxq "$link_to_check" "$idx"; then
    return 0 # Link exists in the file
  else
    return 1 # Link does not exist in the file
  fi
}

download_and_save_link() {
  local url="$1"
  local filename="$2"
  local truncated="${filename:0:50}"
  local sanitized="${truncated//[^a-zA-Z0-9_.-]/_}"

  log "Downloading $url with name $sanitized"
  # Use curl to download the URL and save the content to the specified file
  if ! curl -L -o "$sanitized.mp4" "$url"; then
    log "Curl failed to download the URL: $url"
    return 1
  fi
}

pattern='^\["([^"]*)","([^"]*)"\]$'
# Fetch post ie https://sxyprn.com/post/6545eda2cb76e.html
do_post(){
  local line="$1"

  if link_exists_in_file "$line"; then
    log "Link already exists: $line"
  else
    log "Resolving link $line"
    result="$(get_link_for_item "$line")"

    # Test if the string matches the pattern
    if [[ $result =~ $pattern ]]; then
      val1="${BASH_REMATCH[1]}"
      val2="${BASH_REMATCH[2]}"
      if download_and_save_link "$val2" "$val1"; then
        log "Download success - updating index"
        echo "$line" >> "$idx"
      else
        log "Download failed"
      fi
    else
      log "no match $result"
    fi
  fi
}

# Iterate over rows of posts
do_list_of_posts(){
  local links="$1"

  # Iterate over rows of posts
  while IFS= read -r line; do
    do_post "$line"
  done <<< "$links"
}

# Fetch a page of posts ie https://sxyprn.com/Woodman-Casting-X.html
do_collection_page(){
  local current_url="$1"

  log "Current page: $current_url"

  # Download and parse out items
  links="$(get_items "$current_url")"

  do_list_of_posts "$links"
}

usage() {
  echo "Options -c, -p and -f cannot be used together."
  _exit 1
}

collection=""
post=""
file=""
debug=0
while getopts ":n:d:c:p:f:" opt; do
  case $opt in
    d)
      set -x
      debug=1
      ;;
    n)
    # No op
      no_op="1"
      ;;
    c)
      # Collection
      if [ -n "$post" ] || [ -n "$file" ] || [ "$OPTARG" == "-p" ] || [ "$OPTARG" == "-f" ]; then
        usage
      fi
      collection="$OPTARG"
      ;;
    p)
      # post
      if [ -n "$collection" ] || [ -n "$file" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-f" ]; then
        usage
      fi
      post="$OPTARG"
      ;;
    f)
      # file of posts
      if [ -n "$collection" ] || [ -n "$post" ] || [ "$OPTARG" == "-c" ] || [ "$OPTARG" == "-p" ]; then
        usage
      fi
      file="$OPTARG"
      ;;
    \?)
      echo "Invalid option: -$OPTARG"
      _exit 1
      ;;
    :)
      echo "Option -$OPTARG requires an argument."
      _exit 1
      ;;
  esac
done

echo "debug: $debug"
lock

# Collection
if [ -n "$collection" ]; then
  echo "Scrapping collection.."
  base_url="$collection"
  ## For pages 1 to x
  for ((i=0; i<10; i++)); do
    if [ $i -eq 0 ]; then
      current_url="$base_url"
    else
      current_url="${base_url}?page=$((i * 30))"
    fi

    #Do collection page
    do_collection_page "$current_url"
    sleep 10
  done
fi

# Single post
if [ -n "$post" ]; then
  echo "Scrapping post.."
  do_post "$post"
fi

# File of posts
if [ -n "$file" ]; then
  echo "Scrapping file"
  # Check if the file exists
  if [ -e "$file" ]; then
    # Open the file for reading
    while IFS= read -r line; do
      # Process each line here, for example, echo it
      do_post "$line"
    done < "$file"
  else
    echo "File not found: $file"
  fi
fi

unlock