Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Sat Apr 27 10:20:22 2019
- @author: Gabe
- """
- from __future__ import division, unicode_literals
- from requests import get
- from requests.exceptions import RequestException
- from contextlib import closing
- from bs4 import BeautifulSoup
- import string
- import re
- from itertools import groupby
- import pandas as pd
- import time
- import matplotlib.pyplot as plt
- import numpy as np
- from scipy.stats import gaussian_kde
- from scipy.stats import kde
- import seaborn as sns
- from sklearn.linear_model import LinearRegression
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- import requests
- from bs4 import BeautifulSoup
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- #Zapisywanie
- ##
- #dr = webdriver.Chrome(executable_path=r'C:\Users\Gabe\Desktop\Lanca\chromedriver.exe')
- #dr.get("https://www.mobile.de/pl/samochod/opel-corsa/vhc:car,pgn:1,pgs:50,ms1:19000_10_")
- ##dr.get("https://www.google.pl")
- #bs = BeautifulSoup(dr.page_source,"lxml")
- #
- #print(bs)
- ##dr.quit()
- #print("test")
- #
- #bs1 = bs.prettify("utf-8")
- #with open("output1.html", "wb") as file:
- # file.write(bs1)
- #
- ##file = open('html.txt', 'w')
- ##file.write(str(bs.encode("utf-8")))
- #file.close()
- #
- #
- #Wczytywanie HTML'a
- #from __future__ import division, unicode_literals
- import codecs
- from bs4 import BeautifulSoup
- f=codecs.open("output1.html", 'r', 'utf-8')
- bs = BeautifulSoup(f.read())
- print(bs)
- f.close()
- #
- ##Rok produkcji
- #age_from_page = str(bs.find_all(class_="u-text-bold"))
- #age_from_page_nospace = age_from_page.translate({ord(c): None for c in string.whitespace})
- #age_from_page_nospace = re.sub(r"\s+", '', age_from_page_nospace)
- #age_from_page_out = [int(s) for s in re.findall(r'\b\d+\b',age_from_page_nospace)]
- #j2 = [i for i in age_from_page_out if i >= 1700]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement