Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from lxml import html
- from codecs import encode
- from operator import itemgetter
- USERNAME = encode('grfgnppbhag2016', 'rot13')
- PASSWORD = encode('IlIllIlIl.', 'rot13')
- brand = 'cole+haan'
- category = 'shoes'
- minimumPrice = '10'
- maximumPrice = '25'
- size = '7'
- minimumOriginalPrice = 1
- maximumOriginalPrice = 600
- minimumMarkdown = 95
- newBuyers = []
- global endOfListing
- endOfListing = False
- LOGIN_URL = "https://poshmark.com/login"
- session_requests = requests.session()
- result = session_requests.get(LOGIN_URL)
- tree = html.fromstring(result.text)
- authenticity_token = list(set(tree.xpath("//input[@name='authenticity_token']/@value")))[0]
- payload = {
- "username": USERNAME,
- "password": PASSWORD,
- "authenticity_token": authenticity_token
- }
- # Perform login
- result = session_requests.post(LOGIN_URL, data=payload, headers=dict(referer=LOGIN_URL))
- # Divide search URL for input constraints
- def getURL(page):
- URL = 'https://poshmark.com/search?query={}&category={}&max_id={}&size%5B%5D={}&price%5B%5D={}-{}' \
- '&availability=available&spt=true'.format(brand, category, page, size, minimumPrice, maximumPrice)
- return URL
- def scrapePage(URL):
- result = session_requests.get(URL, headers=dict(referer=URL))
- tree = html.fromstring(result.content)
- if ["Sorry, we couldn't find anything."] == tree.xpath('//h2/text()'): # Checks for End of Listings
- global endOfListing
- endOfListing = True
- currentPrice = tree.xpath('//div[@class="price"]/text()')
- print(len(currentPrice))
- originalPrice = tree.xpath('//span[@class="original"]/text()')
- title = tree.xpath('//span[@class="item-title"]/text()')
- imgLink = tree.xpath('//img[@class="covershot add_pin_it_btn"]/@src')
- description = tree.xpath('//div[@class="description"]/text()')
- id = tree.xpath('//div[@class="listing-con shopping-tile"]/@id')
- def formatPrice(unformattedPrice): # Removes garbage formatting
- formattedPrice = []
- for item in unformattedPrice:
- item = item.strip("\xa0")
- if item == '':
- continue
- formattedPrice.append(item)
- return formattedPrice
- currentPrice = formatPrice(currentPrice) # Removes garbage formatting
- def placeInDictionary(iterations): # Adds all lists into dictionary
- for i in iterations:
- thisOriginalPrice = int(originalPrice[i].strip('$'))
- thisCurrentPrice = int(currentPrice[i].strip('$'))
- if (thisOriginalPrice < minimumOriginalPrice) or (thisOriginalPrice > maximumOriginalPrice):
- continue # Skips garbage listings
- thisMarkdown = str(int((1 - (round(thisCurrentPrice / thisOriginalPrice, 2))) * 100)) + "%"
- if (int(thisMarkdown.strip('%')) < minimumMarkdown):
- continue # Skips listings below markdown threshold
- newBuyers.append({'Title': title[i],
- 'Price': currentPrice[i],
- 'Full price': originalPrice[i],
- 'Markdown': thisMarkdown,
- 'Description': description[i],
- 'Link': str('https://poshmark.com/listing/' + id[i]),
- 'Image': str('https:' + imgLink[i])})
- placeInDictionary(range(len(currentPrice))) # Adds all lists into dictionary
- finalList = sorted(newBuyers, key=itemgetter('Markdown'), reverse=True) # Rearranges dictionary to sort by Markdown
- return finalList
- def checkListings():
- dictionary = []
- global endOfListing
- endOfListing = False
- pageNumber = 1
- while not endOfListing:
- dictionary = scrapePage(getURL(pageNumber))
- pageNumber += 1
- return dictionary
- finalOutput = checkListings()
- print(finalOutput)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement