Advertisement
Guest User

Untitled

a guest
Sep 2nd, 2015
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.58 KB | None | 0 0
  1. <patents>
  2. <patent patno="101103062330">
  3. <office coden="EPO" short="Eur. Pat. Office"> European Office </office>
  4. <volume>80 </volume>
  5. <issue printdate="2009-12-00">6 </issue>
  6. <numpages>13 </numpages>
  7. <section code="A-2D"> Filtering </section>
  8. <patno>101103062330 </patno>
  9. <title> trapping plastic waste </title>
  10. <authgrp>
  11. <author>
  12. <givenname>Endo </givenname>
  13. <surname>Wake </surname>
  14. </author>
  15. <author>
  16. <givenname>C. </givenname>
  17. <surname>Morde </surname>
  18. </author>
  19. <aff> University of M, USA </aff>
  20. </authgrp>
  21. <history>
  22. <received date="2009-07-01"/>
  23. <published date="2009-07-30"/>
  24. </history>
  25. <tag tagyr="2009">
  26. <tagcode>B1.C2.B5 </tagcode>
  27. <tagcode>F4.65.F6 </tagcode>
  28. </tag>
  29. <assignment>
  30. <assigndate date="2009"/>
  31. <rightholder> university of M </rightholder>
  32. </assignment>
  33. </patent>
  34. <patent patno="101103062514">
  35. <office coden="EPO" short="Eur. Pat. Office"> European Office </office>
  36. <issue printdate="2009-12-00">6 </issue>
  37. <numpages>15 </numpages>
  38. <section code="A-3D"> structure and dynamics </section>
  39. <patno>101103062514 </patno>
  40. <title> separation of cascades and photon emission </title>
  41. <authgrp>
  42. <author affref="a1 a2">
  43. <givenname>L. </givenname>
  44. <surname>Slabsky </surname>
  45. </author>
  46. <author affref="a1">
  47. <givenname>D. </givenname>
  48. <surname>Volosvyev </surname>
  49. </author>
  50. <author affref="a3">
  51. <givenname>G. </givenname>
  52. <surname>Nonpl </surname>
  53. </author>
  54. <aff affid="a1"> Institute of Physics,Russia </aff>
  55. <aff affid="a2"> Physics Institute, St. Petersburg </aff>
  56. <aff affid="a3">Technische Universiteit, Dresden </aff>
  57. </authgrp>
  58. <history>
  59. <received date="2009-01-11"/>
  60. <published date="2009-01-31"/>
  61. </history>
  62. <tag tagyr="2009">
  63. <tagcode>A1.B2.C3 </tagcode>
  64. </tag>
  65. <assignment>
  66. <assigndate date="2009"/>
  67. <rightholder> Physics Inst </rightholder>
  68. </assignment>
  69. </patent>
  70. </patents>
  71.  
  72. `Patent Author1 Author2 Author3
  73. 101103062330 Endo Wake C. Morde
  74. 101103062514 L. Slabsky D.Volosyev G. Nonpl`
  75.  
  76. `Patent Author
  77. 101103062330 Endo Wake
  78. 101103062330 C. Morde
  79. 101103062514 L. Slabsky
  80. 101103062514 D.Volosyev
  81. 101103062514 G. Nonpl`
  82.  
  83. `Patent Tag
  84. 101103062330 B1.C2.B5
  85. 101103062330 F4.65.F6
  86. 101103062514 A1.B2.C3`
  87.  
  88. `Author Institution
  89. Endo Wake University of M
  90. C. Morde University of M
  91. L. Slabsky Institute of Physics,Russia
  92. D.Volosyev Physics Institute, St. Petersburg
  93. G. Nonpl Technische Universiteit, Dresden`
  94.  
  95. xmlfile <- xmlInternalTreeParse("filename.xml", useInternal = T)
  96. nodes <- getNodeSet(xmlfile, "//patent")
  97. authors <- lapply(nodes, xpathSApply, ".//author", xmlValue)
  98. patent <- sapply(nodes, xpathSApply, ".//patent", xmlValue)
  99.  
  100. dt1 <- ldply(xmlToList(xmlfile), data.table)
  101.  
  102. lapply(
  103. getNodeSet(patents, "//patent"),
  104. function(patent){
  105. data.frame(
  106. patent = xmlAttrs( patent )[["patno"]],
  107. xmlToDataFrame(
  108. nodes = getNodeSet(patent,".//*[contains(local-name(), 'author')]")
  109. ),
  110. stringsAsFactors = FALSE
  111. )
  112. }
  113. )
  114.  
  115. lapply(
  116. getNodeSet(patents, "//patent"),
  117. function(patent){
  118. data.frame(
  119. patent = xmlAttrs( patent )[["patno"]],
  120. tag = xpathSApply(
  121. patent,
  122. ".//tagcode",
  123. xmlValue
  124. ),
  125. stringsAsFactors = FALSE
  126. )
  127. }
  128. )
  129.  
  130. lapply(
  131. getNodeSet(patents, "//authgrp"),
  132. function(autg){
  133. aff_df <- do.call(
  134. rbind,
  135. xpathApply(
  136. autg,
  137. ".//aff[@affid]", # get only those with affid attr
  138. function(aff){
  139. data.frame(
  140. aff_id = xmlAttrs(aff)[["affid"]],
  141. institution = xmlValue(aff)
  142. )
  143. }
  144. )
  145. )
  146.  
  147. authors <- getNodeSet( autg, "./author")
  148. aut_df <- xmlToDataFrame( nodes = authors )
  149. aut_df$aff_id <- lapply(
  150. 1:length(authors)
  151. ,function(i){
  152. if(!is.null(xmlAttrs(authors[[i]])[["affref"]])){
  153. xmlAttrs(authors[[i]])[["affref"]]
  154. } else {
  155. NA
  156. }
  157. }
  158. )
  159.  
  160. list(aff_df,aut_df)
  161. }
  162. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement