Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # This is just a temporary script to parse a range of pages from
- # vetusware.com
- set -eo pipefail
- clear
- out_f="${HOME}/vetusware_links-${RANDOM}.txt"
- out_f_sort="${HOME}/vetusware_links-sorted-${RANDOM}.txt"
- touch "$out_f" "$out_f_sort"
- declare -A regex
- regex[type]='<p class=\"item_type\"><a href=\".*\">.*<\/a>.*<a href=\".*\">.*<\/a><\/p>.*<p class=\"item_lang\">.*<\/p>'
- regex[desc]='^.*<p class=\"item_description\">(.*)<\/p>.*<p><a href=\"(https:\/\/vetusware.com\/download\/.*)\">download<\/a><\/p>.*$'
- regex[link]='^.*<p><a href=\"(https:\/\/vetusware.com\/download\/.*)\">download<\/a><\/p>.*$'
- regex[name]='^.*<h3>.*<strong>(.*)</strong>.*</h3>.*$'
- n_line=0
- for n in {1..30}; do
- tmp_f="/dev/shm/vetusware_p${n}-${RANDOM}.txt"
- curl -s -o "$tmp_f" "https://vetusware.com/category/OS/?cat=1&page=${n}"
- while read line; do
- (( n_line = n_line + 1 ))
- if [[ $line =~ ${regex[type]} ]]; then
- line_name=$(( n_line + 1 ))
- line_desc=$(( n_line + 2 ))
- sed -Ei "${line_name}s|${regex[name]}|name: \1|" "$tmp_f"
- line_desc_tmp=$(sed -n "${line_desc} p" "$tmp_f")
- if [[ $line_desc_tmp =~ ${regex[desc]} ]]; then
- sed -Ei "${line_desc}s|${regex[desc]}|desc: \1 link: \2|" "$tmp_f"
- else
- sed -Ei "${line_desc}s|${regex[link]}|link: \1|" "$tmp_f"
- fi
- sed -n "${line_name},${line_desc} p" "$tmp_f" >> "$out_f"
- fi
- done <"$tmp_f"
- rm "$tmp_f"
- n_line=0
- done
- regex[name]='^name: '
- regex[desc]='^desc: '
- n_line=0
- while read line; do
- (( n_line = n_line + 1 ))
- if [[ $line =~ ${regex[name]} ]]; then
- line_desc=$(( n_line +1 ))
- sed -n "${n_line},${line_desc} p" "$out_f" | grep -iqF -e source -e dos -e 'os/2' -e windows
- if [[ $? -eq 0 ]]; then
- sed -n "${n_line},${line_desc} p" "$out_f" >> "$out_f_sort"
- echo >> "$out_f_sort"
- fi
- fi
- done <"$out_f"
- rm "$out_f"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement