Advertisement
fant0men

vetusware.sh

Aug 20th, 2020 (edited)
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.86 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # This is just a temporary script to parse a range of pages from
  4. # vetusware.com
  5.  
  6. set -eo pipefail
  7.  
  8. clear
  9.  
  10. out_f="${HOME}/vetusware_links-${RANDOM}.txt"
  11. out_f_sort="${HOME}/vetusware_links-sorted-${RANDOM}.txt"
  12.  
  13. touch "$out_f" "$out_f_sort"
  14.  
  15. declare -A regex
  16.  
  17. regex[type]='<p class=\"item_type\"><a href=\".*\">.*<\/a>.*<a href=\".*\">.*<\/a><\/p>.*<p class=\"item_lang\">.*<\/p>'
  18. regex[desc]='^.*<p class=\"item_description\">(.*)<\/p>.*<p><a href=\"(https:\/\/vetusware.com\/download\/.*)\">download<\/a><\/p>.*$'
  19. regex[link]='^.*<p><a href=\"(https:\/\/vetusware.com\/download\/.*)\">download<\/a><\/p>.*$'
  20. regex[name]='^.*<h3>.*<strong>(.*)</strong>.*</h3>.*$'
  21.  
  22. n_line=0
  23.  
  24. for n in {1..30}; do
  25.     tmp_f="/dev/shm/vetusware_p${n}-${RANDOM}.txt"
  26.  
  27.     curl -s -o "$tmp_f" "https://vetusware.com/category/OS/?cat=1&page=${n}"
  28.  
  29.     while read line; do
  30.         (( n_line = n_line + 1 ))
  31.  
  32.         if [[ $line =~ ${regex[type]} ]]; then
  33.             line_name=$(( n_line + 1 ))
  34.             line_desc=$(( n_line + 2 ))
  35.  
  36.             sed -Ei "${line_name}s|${regex[name]}|name: \1|" "$tmp_f"
  37.  
  38.             line_desc_tmp=$(sed -n "${line_desc} p" "$tmp_f")
  39.  
  40.             if [[ $line_desc_tmp =~ ${regex[desc]} ]]; then
  41.                 sed -Ei "${line_desc}s|${regex[desc]}|desc: \1 link: \2|" "$tmp_f"
  42.             else
  43.                 sed -Ei "${line_desc}s|${regex[link]}|link: \1|" "$tmp_f"
  44.             fi
  45.  
  46.             sed -n "${line_name},${line_desc} p" "$tmp_f" >> "$out_f"
  47.         fi
  48.     done <"$tmp_f"
  49.  
  50.     rm "$tmp_f"
  51.     n_line=0
  52. done
  53.  
  54. regex[name]='^name: '
  55. regex[desc]='^desc: '
  56. n_line=0
  57.  
  58. while read line; do
  59.     (( n_line = n_line + 1 ))
  60.  
  61.     if [[ $line =~ ${regex[name]} ]]; then
  62.         line_desc=$(( n_line +1 ))
  63.  
  64.         sed -n "${n_line},${line_desc} p" "$out_f" | grep -iqF -e source -e dos -e 'os/2' -e windows
  65.  
  66.         if [[ $? -eq 0 ]]; then
  67.             sed -n "${n_line},${line_desc} p" "$out_f" >> "$out_f_sort"
  68.             echo >> "$out_f_sort"
  69.         fi
  70.     fi
  71. done <"$out_f"
  72.  
  73. rm "$out_f"
  74.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement