Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <patents>
- <patent patno="101103062330">
- <office coden="EPO" short="Eur. Pat. Office"> European Office </office>
- <volume>80 </volume>
- <issue printdate="2009-12-00">6 </issue>
- <numpages>13 </numpages>
- <section code="A-2D"> Filtering </section>
- <patno>101103062330 </patno>
- <title> trapping plastic waste </title>
- <authgrp>
- <author>
- <givenname>Endo </givenname>
- <surname>Wake </surname>
- </author>
- <author>
- <givenname>C. </givenname>
- <surname>Morde </surname>
- </author>
- <aff> University of M, USA </aff>
- </authgrp>
- <history>
- <received date="2009-07-01"/>
- <published date="2009-07-30"/>
- </history>
- <tag tagyr="2009">
- <tagcode>B1.C2.B5 </tagcode>
- <tagcode>F4.65.F6 </tagcode>
- </tag>
- <assignment>
- <assigndate date="2009"/>
- <rightholder> university of M </rightholder>
- </assignment>
- </patent>
- <patent patno="101103062514">
- <office coden="EPO" short="Eur. Pat. Office"> European Office </office>
- <issue printdate="2009-12-00">6 </issue>
- <numpages>15 </numpages>
- <section code="A-3D"> structure and dynamics </section>
- <patno>101103062514 </patno>
- <title> separation of cascades and photon emission </title>
- <authgrp>
- <author affref="a1 a2">
- <givenname>L. </givenname>
- <surname>Slabsky </surname>
- </author>
- <author affref="a1">
- <givenname>D. </givenname>
- <surname>Volosvyev </surname>
- </author>
- <author affref="a3">
- <givenname>G. </givenname>
- <surname>Nonpl </surname>
- </author>
- <aff affid="a1"> Institute of Physics,Russia </aff>
- <aff affid="a2"> Physics Institute, St. Petersburg </aff>
- <aff affid="a3">Technische Universiteit, Dresden </aff>
- </authgrp>
- <history>
- <received date="2009-01-11"/>
- <published date="2009-01-31"/>
- </history>
- <tag tagyr="2009">
- <tagcode>A1.B2.C3 </tagcode>
- </tag>
- <assignment>
- <assigndate date="2009"/>
- <rightholder> Physics Inst </rightholder>
- </assignment>
- </patent>
- </patents>
- `Patent Author1 Author2 Author3
- 101103062330 Endo Wake C. Morde
- 101103062514 L. Slabsky D.Volosyev G. Nonpl`
- `Patent Author
- 101103062330 Endo Wake
- 101103062330 C. Morde
- 101103062514 L. Slabsky
- 101103062514 D.Volosyev
- 101103062514 G. Nonpl`
- `Patent Tag
- 101103062330 B1.C2.B5
- 101103062330 F4.65.F6
- 101103062514 A1.B2.C3`
- `Author Institution
- Endo Wake University of M
- C. Morde University of M
- L. Slabsky Institute of Physics,Russia
- D.Volosyev Physics Institute, St. Petersburg
- G. Nonpl Technische Universiteit, Dresden`
- xmlfile <- xmlInternalTreeParse("filename.xml", useInternal = T)
- nodes <- getNodeSet(xmlfile, "//patent")
- authors <- lapply(nodes, xpathSApply, ".//author", xmlValue)
- patent <- sapply(nodes, xpathSApply, ".//patent", xmlValue)
- dt1 <- ldply(xmlToList(xmlfile), data.table)
- lapply(
- getNodeSet(patents, "//patent"),
- function(patent){
- data.frame(
- patent = xmlAttrs( patent )[["patno"]],
- xmlToDataFrame(
- nodes = getNodeSet(patent,".//*[contains(local-name(), 'author')]")
- ),
- stringsAsFactors = FALSE
- )
- }
- )
- lapply(
- getNodeSet(patents, "//patent"),
- function(patent){
- data.frame(
- patent = xmlAttrs( patent )[["patno"]],
- tag = xpathSApply(
- patent,
- ".//tagcode",
- xmlValue
- ),
- stringsAsFactors = FALSE
- )
- }
- )
- lapply(
- getNodeSet(patents, "//authgrp"),
- function(autg){
- aff_df <- do.call(
- rbind,
- xpathApply(
- autg,
- ".//aff[@affid]", # get only those with affid attr
- function(aff){
- data.frame(
- aff_id = xmlAttrs(aff)[["affid"]],
- institution = xmlValue(aff)
- )
- }
- )
- )
- authors <- getNodeSet( autg, "./author")
- aut_df <- xmlToDataFrame( nodes = authors )
- aut_df$aff_id <- lapply(
- 1:length(authors)
- ,function(i){
- if(!is.null(xmlAttrs(authors[[i]])[["affref"]])){
- xmlAttrs(authors[[i]])[["affref"]]
- } else {
- NA
- }
- }
- )
- list(aff_df,aut_df)
- }
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement