Advertisement
Guest User

Untitled

a guest
Nov 8th, 2016
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.87 KB | None | 0 0
  1. import requests
  2. from lxml import html
  3. from codecs import encode
  4. from operator import itemgetter
  5.  
  6.  
  7. USERNAME = encode('grfgnppbhag2016', 'rot13')
  8. PASSWORD = encode('IlIllIlIl.', 'rot13')
  9.  
  10. brand = 'cole+haan'
  11. category = 'shoes'
  12. minimumPrice = '10'
  13. maximumPrice = '25'
  14. size = '7'
  15. minimumOriginalPrice = 1
  16. maximumOriginalPrice = 600
  17. minimumMarkdown = 95
  18.  
  19. newBuyers = []
  20.  
  21. global endOfListing
  22. endOfListing = False
  23.  
  24. LOGIN_URL = "https://poshmark.com/login"
  25.  
  26. session_requests = requests.session()
  27.  
  28. result = session_requests.get(LOGIN_URL)
  29.  
  30. tree = html.fromstring(result.text)
  31. authenticity_token = list(set(tree.xpath("//input[@name='authenticity_token']/@value")))[0]
  32.  
  33. payload = {
  34.         "username": USERNAME,
  35.         "password": PASSWORD,
  36.         "authenticity_token": authenticity_token
  37.     }
  38.  
  39. # Perform login
  40. result = session_requests.post(LOGIN_URL, data=payload, headers=dict(referer=LOGIN_URL))
  41.  
  42.  
  43. # Divide search URL for input constraints
  44. def getURL(page):
  45.     URL = 'https://poshmark.com/search?query={}&category={}&max_id={}&size%5B%5D={}&price%5B%5D={}-{}' \
  46.           '&availability=available&spt=true'.format(brand, category, page, size, minimumPrice, maximumPrice)
  47.     return URL
  48.  
  49.  
  50. def scrapePage(URL):
  51.     result = session_requests.get(URL, headers=dict(referer=URL))
  52.     tree = html.fromstring(result.content)
  53.     if ["Sorry, we couldn't find anything."] == tree.xpath('//h2/text()'):  # Checks for End of Listings
  54.         global endOfListing
  55.         endOfListing = True
  56.  
  57.     currentPrice = tree.xpath('//div[@class="price"]/text()')
  58.     print(len(currentPrice))
  59.     originalPrice = tree.xpath('//span[@class="original"]/text()')
  60.     title = tree.xpath('//span[@class="item-title"]/text()')
  61.     imgLink = tree.xpath('//img[@class="covershot add_pin_it_btn"]/@src')
  62.     description = tree.xpath('//div[@class="description"]/text()')
  63.     id = tree.xpath('//div[@class="listing-con shopping-tile"]/@id')
  64.  
  65.     def formatPrice(unformattedPrice):  # Removes garbage formatting
  66.         formattedPrice = []
  67.         for item in unformattedPrice:
  68.             item = item.strip("\xa0")
  69.             if item == '':
  70.                 continue
  71.             formattedPrice.append(item)
  72.         return formattedPrice
  73.  
  74.     currentPrice = formatPrice(currentPrice)  # Removes garbage formatting
  75.  
  76.     def placeInDictionary(iterations):  # Adds all lists into dictionary
  77.         for i in iterations:
  78.             thisOriginalPrice = int(originalPrice[i].strip('$'))
  79.             thisCurrentPrice = int(currentPrice[i].strip('$'))
  80.             if (thisOriginalPrice < minimumOriginalPrice) or (thisOriginalPrice > maximumOriginalPrice):
  81.                 continue  # Skips garbage listings
  82.             thisMarkdown = str(int((1 - (round(thisCurrentPrice / thisOriginalPrice, 2))) * 100)) + "%"
  83.             if (int(thisMarkdown.strip('%')) < minimumMarkdown):
  84.                 continue  # Skips listings below markdown threshold
  85.             newBuyers.append({'Title': title[i],
  86.                               'Price': currentPrice[i],
  87.                               'Full price': originalPrice[i],
  88.                               'Markdown': thisMarkdown,
  89.                               'Description': description[i],
  90.                               'Link': str('https://poshmark.com/listing/' + id[i]),
  91.                               'Image': str('https:' + imgLink[i])})
  92.  
  93.     placeInDictionary(range(len(currentPrice)))  # Adds all lists into dictionary
  94.     finalList = sorted(newBuyers, key=itemgetter('Markdown'), reverse=True)  # Rearranges dictionary to sort by Markdown
  95.     return finalList
  96.  
  97.  
  98. def checkListings():
  99.     dictionary = []
  100.     global endOfListing
  101.     endOfListing = False
  102.     pageNumber = 1
  103.     while not endOfListing:
  104.         dictionary = scrapePage(getURL(pageNumber))
  105.         pageNumber += 1
  106.     return dictionary
  107.  
  108. finalOutput = checkListings()
  109. print(finalOutput)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement