Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # This script retrieves ratings and rating counts from a user's Itch.io library.
- i=1
- rm -v json/my-purchases*.json
- while true; do
- echo "Reading purchases page $i."
- # Use your web browser's dev tools to copy this link as curl (POSIX) address:
- # https://itch.io/my-purchases?page=1&format=json
- # Then insert the headers in the following command:
- curl -s 'https://itch.io/my-purchases?page='$i'&format=json' ***headers go here*** >json/my-purchases$i.json
- sleep 1
- if grep '"num_items":0' json/my-purchases$i.json; then
- rm -v json/my-purchases$i.json
- break
- fi
- ((i++))
- done
- # Notes
- # - Using find because it can sort file names numerically. So file1, file2, file10 instead of file1, file10, file2.
- # - The awk command prints unique lines only. So like sort -u, but without sorting.
- cat $(find json -name "*.json") | tr -d '\\' | grep -o 'https://[^.]*\.itch\.io/[^/]*' | awk '!x[$0]++' > "url_list.txt"
- rm -v parse_purchases.txt
- for url in $(cat "url_list.txt")
- do
- pathname=$(echo "$url" | cut -d / -f 4)
- mkdir -p "html"
- filename="html/$pathname.html"
- if test ! -e "$filename"; then
- echo "$pathname"
- wget -O "$filename" -q "$url"
- sleep 1
- fi
- ratingCount=$(grep -oP 'ratingCount":\d*' "$filename" | cut -d : -f 2)
- if test -z "$ratingCount"
- then ratingCount="0"
- fi
- ratingValue=$(grep -oP 'ratingValue":"[^"]*' "$filename" | cut -d '"' -f 3)
- if test -z "$ratingValue"
- then ratingValue="0.0"
- fi
- echo -e "$ratingValue\t$ratingCount\t$pathname\t$url" >>parse_purchases.txt
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement