Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- reload(sys)
- sys.setdefaultencoding("utf-8")
- sys.path.insert(0, 'libs')
- import webapp2
- from bs4 import BeautifulSoup
- import math
- import urllib2
- class MainPage(webapp2.RequestHandler):
- def get(self, args1, args2):
- url = "http://emart.ssg.com/category/list.ssg?dispCtgId="
- url += str(args1)
- url += "&page="
- url += str(args2)
- print("log_print" + url);
- data = ""
- source_code = urllib2.urlopen(url).read()
- plain_text = source_code
- soup = BeautifulSoup(plain_text, "html.parser")
- for info_list in soup.find("tbody").find_all(class_="item w202"):
- title = info_list.find(class_="title").a["title"]
- if title is None:
- continue
- else:
- data += title
- data += "t"
- price = info_list.find(class_="price")
- if price is None:
- continue
- else:
- data += price.strong.string
- data += "t"
- img_url = info_list.find(class_="thm").a.img["src"]
- if img_url is None:
- continue
- else:
- data += "http:" + img_url
- data += "t"
- code = img_url.split("/")[7].split("_")[0]
- data += code + "n"
- self.response.write(data)
- app = webapp2.WSGIApplication([('/emart_product_v2/(d+)_(d+)', MainPage),], debug=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement