Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(rvest)
- URL = 'http://www.oricon.co.jp/rank/js/w/2017-01-16/p/4/'
- read_html(URL)
- read_html('http://www.oricon.co.jp/rank/js/w/2017-01-16/p/2/')
- # {xml_document}
- # <html>
- # [1] <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb#">n <meta charset="shi ...
- # [2] <body id="container"> n<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11 ...
- library(httr)
- guess_encoding(content(GET(URL), 'raw'))
- # encoding language confidence
- # 1 ISO-8859-1 pt 0.72
- # 2 ISO-8859-2 ro 0.44
- # 3 UTF-8 0.10
- # 4 Shift_JIS ja 0.10
- # 5 GB18030 zh 0.10
- # 6 EUC-JP ja 0.10
- # 7 EUC-KR ko 0.10
- # 8 Big5 zh 0.10
- # 9 ISO-8859-9 tr 0.01
- read_html(URL, encoding = 'Shift_JIS')
- read_html(URL, encoding = 'EUC-JP')
- rawToChar(as.raw(c(0xFA, 0xB1, 0x90, 0xE7)))
- [1] "﨑千"
- library(httr)
- library(rvest)
- x <- GET('http://www.oricon.co.jp/rank/js/w/2017-01-16/p/4/')
- x_text <- content(x, as = "text")
- x_xml <- read_html(x_text)
- x_nodes <- x_xml %>%
- html_nodes("section.box-rank-entry > div > a > div > p")
- # このノードの「﨑千」がエラーになっていたが、今回はきちんと表示できる
- x_nodes[7] %>%
- html_text()
- #> [1] "川島瑞樹(CV:東山奈央),日野茜(CV:赤﨑千夏),堀裕子(CV:鈴木絵理),上田鈴帆(CV:春野ななみ),難波笑美(CV:伊達朱里紗)"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement