Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(stringr)
- library(dplyr)
- library(purrr)
- library(rlang)
- library(text2vec) # for movie reviews data
- # favor stringr over base R because stringr handles NAs whereas base R returns
- # zero length, base R below:
- # regmatches(string, regexpr(x, string, ignore.case = ignore_case))
- pattern_context <- function(string, pattern, n_before = 10, n_after = 10,
- ignore_case = TRUE, first = TRUE) {
- x <- paste0(".{0,", n_before, "}", pattern, ".{0,", n_after, "}")
- if (first)
- stringr::str_extract(string, stringr::regex(pattern = x, ignore_case = ignore_case))
- else
- stringr::str_extract_all(string, stringr::regex(pattern = x, ignore_case = ignore_case))
- }
- pattern_context_df <- function(.data, ..., cols = everything()) {
- df_context <- .data %>%
- dplyr::select({{ cols }}) %>%
- purrr::map(pattern_context, ...) %>%
- dplyr::bind_cols()
- names(df_context) <- paste0(names(df_context), "_context")
- df <- dplyr::bind_cols(.data, df_context)
- df[, order(colnames(df))]
- }
- # create variables
- movie_review %>%
- as_tibble() %>%
- mutate(
- bad_context = pattern_context(review, "bad"),
- good_context = pattern_context(review, "good"),
- ) %>%
- select(review:good_context)
- #> # A tibble: 5,000 x 3
- #> review bad_context good_context
- #> <chr> <chr> <chr>
- #> 1 With all this stuff going down at ~ drugs are bad m'~ <NA>
- #> 2 "\\\"The Classic War of the Worlds~ <NA> <NA>
- #> 3 The film starts with a manager (Ni~ oments is badly ~ ivers the goods w~
- #> 4 "It must be assumed that those who~ <NA> ng of the Good Fr~
- #> 5 "Superbly trashy and wondrously un~ <NA> <NA>
- #> 6 I dont know why people think this ~ is such a bad mo~ " a pretty good p~
- #> 7 This movie could have been very go~ "me really bad m~ been very good, b~
- #> 8 I watched this video at a friend's~ cience is bad, a~ <NA>
- #> 9 A friend of mine bought this film ~ <NA> <NA>
- #> 10 "<br /><br />This movie is full of~ <NA> <NA>
- #> # ... with 4,990 more rows
- # create variable for every column
- movie_review %>%
- as_tibble() %>%
- select(id, review) %>%
- pattern_context_df("good")
- #> # A tibble: 5,000 x 4
- #> id id_context review review_context
- #> <chr> <chr> <chr> <chr>
- #> 1 5814_8 <NA> With all this stuff going down at~ <NA>
- #> 2 2381_9 <NA> "\\\"The Classic War of the World~ <NA>
- #> 3 7759_3 <NA> The film starts with a manager (N~ ivers the goods wi~
- #> 4 3630_4 <NA> "It must be assumed that those wh~ ng of the Good Fri~
- #> 5 9495_8 <NA> "Superbly trashy and wondrously u~ <NA>
- #> 6 8196_8 <NA> I dont know why people think this~ " a pretty good pl~
- #> 7 7166_2 <NA> This movie could have been very g~ been very good, bu~
- #> 8 10633~ <NA> I watched this video at a friend'~ <NA>
- #> 9 319_1 <NA> A friend of mine bought this film~ <NA>
- #> 10 8713_~ <NA> "<br /><br />This movie is full o~ <NA>
- #> # ... with 4,990 more rows
- # create variable for specified columns
- movie_review %>%
- as_tibble() %>%
- select(id, review) %>%
- pattern_context_df("good", cols = review)
- #> # A tibble: 5,000 x 3
- #> id review review_context
- #> <chr> <chr> <chr>
- #> 1 5814_8 With all this stuff going down at the mome~ <NA>
- #> 2 2381_9 "\\\"The Classic War of the Worlds\\\" by ~ <NA>
- #> 3 7759_3 The film starts with a manager (Nicholas B~ ivers the goods wit~
- #> 4 3630_4 "It must be assumed that those who praised~ ng of the Good Frid~
- #> 5 9495_8 "Superbly trashy and wondrously unpretenti~ <NA>
- #> 6 8196_8 I dont know why people think this is such ~ " a pretty good plo~
- #> 7 7166_2 This movie could have been very good, but ~ been very good, but~
- #> 8 10633_1 I watched this video at a friend's house. ~ <NA>
- #> 9 319_1 A friend of mine bought this film for 1, a~ <NA>
- #> 10 8713_10 "<br /><br />This movie is full of referen~ <NA>
- #> # ... with 4,990 more rows
- # create variable for all columns except specified
- movie_review %>%
- as_tibble() %>%
- select(id, review) %>%
- pattern_context_df("good", cols = -id)
- #> # A tibble: 5,000 x 3
- #> id review review_context
- #> <chr> <chr> <chr>
- #> 1 5814_8 With all this stuff going down at the mome~ <NA>
- #> 2 2381_9 "\\\"The Classic War of the Worlds\\\" by ~ <NA>
- #> 3 7759_3 The film starts with a manager (Nicholas B~ ivers the goods wit~
- #> 4 3630_4 "It must be assumed that those who praised~ ng of the Good Frid~
- #> 5 9495_8 "Superbly trashy and wondrously unpretenti~ <NA>
- #> 6 8196_8 I dont know why people think this is such ~ " a pretty good plo~
- #> 7 7166_2 This movie could have been very good, but ~ been very good, but~
- #> 8 10633_1 I watched this video at a friend's house. ~ <NA>
- #> 9 319_1 A friend of mine bought this film for 1, a~ <NA>
- #> 10 8713_10 "<br /><br />This movie is full of referen~ <NA>
- #> # ... with 4,990 more rows
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement