Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <TD class="c1">111-1111</TD>
- <TD class="c2">AA1111-1111</TD>
- <TD class="c3">NAME1</TD>
- <TD class="c4"><INPUT type="text" id="F1" readonly="readonly" value=" .368"></TD>
- <TD class="c5"><INPUT type="text" id="Q1" readonly="readonly" value=""></TD>
- </TR>
- <TR class="r1">
- <TD class="c1">222-2222</TD>
- <TD class="c2">BB2222-2222</TD>
- <TD class="c3">NAME2</TD>
- <TD class="c4"><INPUT type="text" id="F2" readonly="readonly" value=" 1.28"></TD>
- <TD class="c5"><INPUT type="text" id="Q2" readonly="readonly" value=""></TD>
- </TR>
- soup = BeautifulSoup(html,'lxml')
- description = [element.text for element in soup.find_all(class_="c3")]
- component = [element.text for element in soup.find_all(class_="c1")]
- code = [element.text for element in soup.find_all(class_="c2")]
- val = re.findall(r'value="(.*?)"', html)
- value = [value for value in val if value != '']
- value.insert(0, 'Value')
- data = []
- for a, b, c, in zip(component ,description,value):
- data.append([a, b, c,])
- df = pd.DataFrame(data, columns=['cod','desc','val'])
- import pandas as pd
- from bs4 import BeautifulSoup
- from pathlib import Path
- def get_vals(soup, filt="[class='c4']"):
- ret = [x.input.attrs["value"].strip()
- for x in soup.select(f"td{filt}")[1:]]
- return pd.to_numeric(ret, errors="coerce")
- url = r"C:downloadCONCTEXT_NCS_S0907R50B.htm"
- soup = BeautifulSoup(Path(url).read_text(encoding="utf-8"), 'lxml')
- df = pd.read_html(url, header=0)[0]
- df["Recipe Qty"] = get_vals(soup, filt="[class='c4']")
- In [123]: df
- Out[123]:
- Component S-W Code Description Recipe Qty Required Quantity
- 0 241-2905 TZ4103-3905 BLUE FTALO 0.368 NaN
- 1 241-6909 TZ4103-2909 OXYDE RED 1.280 NaN
- 2 241-7906 TZ4103-3406 RED BORDEAUX 1.120 NaN
- 3 X80LC-G NaN WHITE TEXTURED TOP COAT (*) 997.232 NaN
- In [124]: df.dtypes
- Out[124]:
- Component object
- S-W Code object
- Description object
- Recipe Qty float64
- Required Quantity float64
- dtype: object
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement