Advertisement
Guest User

Untitled

a guest
Jul 17th, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.11 KB | None | 0 0
  1. <TD class="c1">111-1111</TD>
  2. <TD class="c2">AA1111-1111</TD>
  3. <TD class="c3">NAME1</TD>
  4. <TD class="c4"><INPUT type="text" id="F1" readonly="readonly" value=" .368"></TD>
  5. <TD class="c5"><INPUT type="text" id="Q1" readonly="readonly" value=""></TD>
  6. </TR>
  7. <TR class="r1">
  8. <TD class="c1">222-2222</TD>
  9. <TD class="c2">BB2222-2222</TD>
  10. <TD class="c3">NAME2</TD>
  11. <TD class="c4"><INPUT type="text" id="F2" readonly="readonly" value=" 1.28"></TD>
  12. <TD class="c5"><INPUT type="text" id="Q2" readonly="readonly" value=""></TD>
  13. </TR>
  14.  
  15. soup = BeautifulSoup(html,'lxml')
  16. description = [element.text for element in soup.find_all(class_="c3")]
  17. component = [element.text for element in soup.find_all(class_="c1")]
  18. code = [element.text for element in soup.find_all(class_="c2")]
  19. val = re.findall(r'value="(.*?)"', html)
  20. value = [value for value in val if value != '']
  21. value.insert(0, 'Value')
  22.  
  23. data = []
  24. for a, b, c, in zip(component ,description,value):
  25. data.append([a, b, c,])
  26.  
  27. df = pd.DataFrame(data, columns=['cod','desc','val'])
  28.  
  29. import pandas as pd
  30. from bs4 import BeautifulSoup
  31. from pathlib import Path
  32.  
  33. def get_vals(soup, filt="[class='c4']"):
  34. ret = [x.input.attrs["value"].strip()
  35. for x in soup.select(f"td{filt}")[1:]]
  36. return pd.to_numeric(ret, errors="coerce")
  37.  
  38. url = r"C:downloadCONCTEXT_NCS_S0907R50B.htm"
  39.  
  40. soup = BeautifulSoup(Path(url).read_text(encoding="utf-8"), 'lxml')
  41.  
  42. df = pd.read_html(url, header=0)[0]
  43. df["Recipe Qty"] = get_vals(soup, filt="[class='c4']")
  44.  
  45. In [123]: df
  46. Out[123]:
  47. Component S-W Code Description Recipe Qty Required Quantity
  48. 0 241-2905 TZ4103-3905 BLUE FTALO 0.368 NaN
  49. 1 241-6909 TZ4103-2909 OXYDE RED 1.280 NaN
  50. 2 241-7906 TZ4103-3406 RED BORDEAUX 1.120 NaN
  51. 3 X80LC-G NaN WHITE TEXTURED TOP COAT (*) 997.232 NaN
  52.  
  53. In [124]: df.dtypes
  54. Out[124]:
  55. Component object
  56. S-W Code object
  57. Description object
  58. Recipe Qty float64
  59. Required Quantity float64
  60. dtype: object
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement