Advertisement
Guest User

Untitled

a guest
Jan 16th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. library(rvest)
  2. URL = 'http://www.oricon.co.jp/rank/js/w/2017-01-16/p/4/'
  3. read_html(URL)
  4.  
  5. read_html('http://www.oricon.co.jp/rank/js/w/2017-01-16/p/2/')
  6. # {xml_document}
  7. # <html>
  8. # [1] <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb#">n <meta charset="shi ...
  9. # [2] <body id="container">&#13;n<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11 ...
  10.  
  11. library(httr)
  12. guess_encoding(content(GET(URL), 'raw'))
  13. # encoding language confidence
  14. # 1 ISO-8859-1 pt 0.72
  15. # 2 ISO-8859-2 ro 0.44
  16. # 3 UTF-8 0.10
  17. # 4 Shift_JIS ja 0.10
  18. # 5 GB18030 zh 0.10
  19. # 6 EUC-JP ja 0.10
  20. # 7 EUC-KR ko 0.10
  21. # 8 Big5 zh 0.10
  22. # 9 ISO-8859-9 tr 0.01
  23.  
  24. read_html(URL, encoding = 'Shift_JIS')
  25.  
  26. read_html(URL, encoding = 'EUC-JP')
  27.  
  28. rawToChar(as.raw(c(0xFA, 0xB1, 0x90, 0xE7)))
  29. [1] "﨑千"
  30.  
  31. library(httr)
  32. library(rvest)
  33.  
  34. x <- GET('http://www.oricon.co.jp/rank/js/w/2017-01-16/p/4/')
  35. x_text <- content(x, as = "text")
  36. x_xml <- read_html(x_text)
  37.  
  38. x_nodes <- x_xml %>%
  39. html_nodes("section.box-rank-entry > div > a > div > p")
  40.  
  41. # このノードの「﨑千」がエラーになっていたが、今回はきちんと表示できる
  42. x_nodes[7] %>%
  43. html_text()
  44. #> [1] "川島瑞樹(CV:東山奈央),日野茜(CV:赤﨑千夏),堀裕子(CV:鈴木絵理),上田鈴帆(CV:春野ななみ),難波笑美(CV:伊達朱里紗)"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement