Guest User

Untitled

a guest
Aug 15th, 2018
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. domain count
  2. 1 START OF NEW FILE
  3. 2 94_res.txt
  4. 3 google.ru 2
  5. 4 START OF NEW FILE
  6. 5 95_res.txt
  7. 6 search-results.com 2
  8. 7 hpc.ru 1
  9. 8 theadgateway.com 1
  10. 9 google.by 6
  11.  
  12. transform(test.df,
  13. fnameRaw = ifelse(lag(test.df$domain, 1) == "START OF NEW FILE ",
  14. test.df$domain,
  15. ""))
  16. domain count fnameRaw
  17. 1 START OF NEW FILE START OF NEW FILE
  18. 2 94_res.txt
  19. 3 google.ru 2
  20. 4 START OF NEW FILE START OF NEW FILE
  21. 5 95_res.txt
  22. 6 search-results.com 2
  23. 7 hpc.ru 1
  24. 8 theadgateway.com 1
  25. 9 google.by 6
  26.  
  27. domain count fnameRaw filename
  28. 1 START OF NEW FILE N/A
  29. 2 94_res.txt 94_res.txt 94_res.txt
  30. 3 google.ru 2 94_res.txt
  31. 4 START OF NEW FILE 94_res.txt
  32. 5 95_res.txt 95_res.txt 95_res.txt
  33. 6 search-results.com 2 95_res.txt
  34. 7 hpc.ru 1 95_res.txt
  35. 8 theadgateway.com 1 95_res.txt
  36. 9 google.by 6 95_res.txt
  37.  
  38. library(dplyr)
  39. library(zoo)
  40.  
  41. DF %>%
  42. mutate(filename = ifelse(lag(domain) == "START OF NEW FILE", domain, NA),
  43. filename = na.locf0(filename),
  44. filename = ifelse(domain == "START OF NEW FILE", NA, filename))
  45.  
  46. domain count filename
  47. 1 START OF NEW FILE NA <NA>
  48. 2 94_res.txt NA 94_res.txt
  49. 3 google.ru 2 94_res.txt
  50. 4 START OF NEW FILE NA <NA>
  51. 5 95_res.txt NA 95_res.txt
  52. 6 search-results.com 2 95_res.txt
  53. 7 hpc.ru 1 95_res.txt
  54. 8 theadgateway.com 1 95_res.txt
  55. 9 google.by 6 95_res.txt
  56.  
  57. library(dplyr)
  58.  
  59. DF %>%
  60. group_by(g = cumsum(domain == "START OF NEW FILE")) %>%
  61. mutate(filename = c(NA, rep(domain[2], n()-1))) %>%
  62. ungroup %>%
  63. select(-g)
  64.  
  65. g <- cumsum(DF$domain == "START OF NEW FILE")
  66. make_Filename <- function(x) c(NA, rep(x[2], length(x) - 1))
  67. transform(DF, filename = ave(DF$domain, g, FUN = make_filename))
  68.  
  69. DF <- structure(list(domain = c("START OF NEW FILE", "94_res.txt",
  70. "google.ru", "START OF NEW FILE", "95_res.txt", "search-results.com",
  71. "hpc.ru", "theadgateway.com", "google.by"), count = c(NA, NA,
  72. 2L, NA, NA, 2L, 1L, 1L, 6L)), row.names = c(NA, -9L), class = "data.frame")
Add Comment
Please, Sign In to add comment