Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, http.cookiejar, urllib.request
- import re,csv
- from glob import glob
- f = open("downloadProducts.sh",'w', encoding='utf-8')
- for i in range(1,57):
- url = "https://b2b.example.com/page/"+str(i)+"/?s=parameterHere" # this is from a wordpress site
- cj = http.cookiejar.MozillaCookieJar()
- cj.load(os.path.join(os.path.expanduser("~"), ".netscape", "cookies.txt")) # path from local cookie file (downloaded from Chrome)
- opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
- r = opener.open(url)
- for lines in r.readlines():
- if '<h2 class="entry-title">' in str(lines): # in case of encoding error (since site is using Greek language) use decode cp1252
- flag = 1
- match = re.search(r'href=[\'"]?([^\'" >]+)', str(lines))
- if match:
- productURL = match.group(0).replace('href="',"")
- array = productURL.split('/')
- size = len(array)
- sku = array[size-2].split('-')[len(array[size-2].split('-'))-1] # format of product URL looks like this https://b2b.example.com/product/necklace-base-metal-sku/
- f.write('curl --cookie cookies.txt "' + productURL + '" -o ' + sku +'.html\n') # https://stackoverflow.com/questions/55608561/python3-issue-using-cookiejar-and-urllib-request since there is no responce from there, the other method is to use curl
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement