Advertisement
naren_paste

Flipkart_Scrapper

Feb 2nd, 2024
775
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.98 KB | Source Code | 0 0
  1. from flask import Flask, render_template, request, jsonify
  2. from flask_cors import CORS, cross_origin
  3. import requests
  4. from bs4 import BeautifulSoup as bs
  5. from urllib.request import urlopen
  6. import pymongo
  7. import csv
  8. import os
  9. import time
  10. from selenium import webdriver
  11. from selenium.webdriver.common.by import By  # This needs to be used
  12.  
  13. application = Flask(__name__)  # initializing a flask app
  14. app = application
  15.  
  16.  
  17. @app.route("/", methods=["GET"])
  18. @cross_origin()
  19. def homePage():
  20.     return render_template("index.html")
  21.  
  22.  
  23. @app.route("/review", methods=["POST", "GET"])
  24. @cross_origin()
  25. def index():
  26.     if request.method == "POST":
  27.         try:
  28.             DRIVER_PATH = r"chromedriver.exe"
  29.  
  30.             # Initialize the Chrome WebDriver
  31.             driver = webdriver.Chrome(DRIVER_PATH)
  32.             searchString = request.form["content"].replace(" ", "")
  33.             flipkart_url = "https://www.flipkart.com/search?q=" + searchString
  34.  
  35.             driver.get(flipkart_url)
  36.             flipkartPage = driver.page_source
  37.             flipkart_html = bs(flipkartPage, "html.parser")
  38.             bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
  39.             del bigboxes[0:3]
  40.             box = bigboxes[0]
  41.  
  42.             productLink = "https://www.flipkart.com" + box.div.div.div.a["href"]
  43.             driver.get(productLink)
  44.             prodRes = driver.page_source
  45.             driver.quit()
  46.             prod_html = bs(prodRes, "html.parser")
  47.             commentboxes = prod_html.find_all("div", {"class": "_16PBlm"})
  48.  
  49.             reviews = []
  50.             i = 0
  51.             j = len(commentboxes) - 1
  52.  
  53.             for commentbox in commentboxes:
  54.                 try:
  55.                     price_element = flipkart_html.select("div._25b18c ._30jeq3")[0]
  56.                     price = price_element.text
  57.                 except:
  58.                     price = "There is no price"
  59.  
  60.                 if i < j:
  61.                     try:
  62.                         # name.encode(encoding='utf-8')
  63.                         name = commentbox.div.div.find_all(
  64.                             "p", {"class": "_2sc7ZR _2V5EHH"}
  65.                         )[0].text
  66.                     except:
  67.                         name = "No Name"
  68.  
  69.                     try:
  70.                         # rating.encode(encoding='utf-8')
  71.                         rating = commentbox.div.div.div.div.text
  72.                     except:
  73.                         rating = "No Rating"
  74.  
  75.                     try:
  76.                         # commentHead.encode(encoding='utf-8')
  77.                         commentHead = commentbox.div.div.div.p.text
  78.                     except:
  79.                         commentHead = "No Comment Heading"
  80.  
  81.                     try:
  82.                         comtag = commentbox.div.div.find_all("div", {"class": ""})
  83.                         # custComment.encode(encoding='utf-8')
  84.                         custComment = comtag[0].text
  85.                         comments = custComment[0:-9]
  86.                     except Exception as e:
  87.                         print("Exception while creating dictionary: ", e)
  88.  
  89.                     i += 1
  90.                 else:
  91.                     continue
  92.  
  93.                 mydict = {
  94.                     "Price": price,
  95.                     "Product": searchString,
  96.                     "Customer Name": name,
  97.                     "Rating": rating,
  98.                     "Heading": commentHead,
  99.                     "Comment": comments,
  100.                 }
  101.                 reviews.append(mydict)
  102.  
  103.             # Inserting Datas in CSV file
  104.             os.makedirs("data", exist_ok=True)
  105.             filename = os.path.join("data", searchString + ".csv")
  106.             print(filename)
  107.             with open(filename, "w", newline="", encoding="utf-8") as fw:
  108.                 headers = [
  109.                     "Price",
  110.                     "Product",
  111.                     "Customer Name",
  112.                     "Rating",
  113.                     "Heading",
  114.                     "Comment",
  115.                 ]
  116.                 writer = csv.DictWriter(fw, fieldnames=headers)
  117.                 writer.writeheader()
  118.                 writer.writerows(reviews)
  119.  
  120.             # # Inserting Datas in Mongo Atlas
  121.             # client = pymongo.MongoClient(
  122.             #     "mongodb+srv://naren:naren@clusterflip.bukguae.mongodb.net/?retryWrites=true&w=majority"
  123.             # )
  124.             # db = client["scrap_flipkart"]
  125.             # review_col = db["scrap_review"]
  126.             # review_col.insert_many(reviews)
  127.  
  128.             return render_template("results.html", reviews=reviews)
  129.  
  130.         except Exception as e:
  131.             print("The Exception message is: ", e)
  132.             return "something is wrong"
  133.  
  134.     else:
  135.         return render_template("index.html")
  136.  
  137.  
  138. if __name__ == "__main__":
  139.     app.run(host="127.0.0.1", port=5000, debug=True)
  140.  
  141.  
  142. # REQUIREMENTS #
  143. # pandas
  144. # numpy
  145. # certifi
  146. # Flask
  147. # Flask-Cors
  148. # gunicorn
  149. # pymongo[srv]
  150. # selenium<4.10
  151. # beautifulsoup4
  152. # requests
  153.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement