Guest User

Untitled

a guest
Nov 9th, 2017
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.89 KB | None | 0 0
  1. #
  2. # This is the server logic of a Shiny web application. You can run the
  3. # application by clicking 'Run App' above.
  4.  
  5. library(shiny)
  6. library(stringr)
  7. library(tidytext)
  8. library(tidyverse)
  9.  
  10.  
  11. # increase the default file inut size to 50 MB
  12. options(shiny.maxRequestSize = 50*1024^2)
  13.  
  14. # Define server logic required
  15. shinyServer(function(input, output) {
  16.  
  17. # take the inputs from UI (fileInput - "text_file") and
  18. # read them into a reactive dataframe
  19. text_data <- reactive({
  20.  
  21. file_with_text <- input$text_file
  22.  
  23. if(is.null(file_with_text)){return()}
  24. read_delim(file = file_with_text$datapath, delim = input$sep, col_names = FALSE,
  25. locale = locale(encoding = "WINDOWS-1252"))
  26.  
  27. })# end reactive
  28.  
  29.  
  30. # build regex pattern lexicons for each type of personal information
  31. pan_no <- "[[A-Z]]{5}[[:digit:]]{4}[[A-Z]]{1}|PAN[[:space:]][[A-Z]]{5}[[:digit:]]{4}[[A-Z]]{1}|PAN[[:space:]][A-z]{1,}[[:space:]][A-Z]{5}[[:digit:]]{4}[A-Z]{1}"
  32. email_address <- ".{1,}@.{1,}.{1}[A-z]{3,}"
  33. folio_no <- "Folio.{1,}[0-9]{10}|folio.{1,}[0-9]{10}"
  34.  
  35. df <- reactive({
  36. ln <- length(text_data()$X1)
  37.  
  38. text_data()$X1 %>%
  39. map(str_detect ,pattern = c(pan_no,email_address,folio_no)) %>%
  40. set_names(1:ln) %>% as_data_frame() %>%
  41. gather(line_number, string_presence , 1:ln ) %>%
  42. bind_cols(data_frame(information_type = rep_len(c("pan_no","email_address",
  43. "folio_no"), ln*3)))
  44. })# end reactive
  45.  
  46.  
  47. # exclusion of false positives
  48. df <- reactive({
  49. ln <- length(text_data()$X1)
  50.  
  51. # pan number - exclusion of false positives
  52. text_data()$X1 %>% str_replace_all("(?<=For example).{1,}[[A-Z]]{5}[[:digit:]]{4}[[A-Z]]{1}", "") %>%
  53. map(str_detect ,pattern = c(pan_no,email_address,folio_no)) %>%
  54. set_names(1:ln) %>% as_data_frame() %>%
  55. gather(line_number, string_presence , 1:ln ) %>%
  56. bind_cols(data_frame(information_type = rep_len(c("pan_no","email_address",
  57. "folio_no"), ln*3)))
  58. })
  59.  
  60.  
  61.  
  62. # the outputs, table showing the data etc.
  63. output$textData <- renderTable({
  64. if(is.null(text_data())){return ()}
  65. text_data()
  66. })
  67.  
  68.  
  69. # output 2, the plot with the string detection results
  70. output$ggplot <- renderPlot({
  71. (
  72. df() %>% filter(string_presence == TRUE) %>%
  73. ggplot(., aes(information_type)) +
  74. geom_bar(aes(fill = line_number))
  75.  
  76. )# end ggplot
  77.  
  78. })# end renderPlot
  79.  
  80. # the dynamic UI
  81. output$tb <- renderUI({
  82.  
  83. # check if the file has been uploaded, if not display the image,
  84. # otherwise display the data
  85. if(is.null(text_data())){
  86. h5("Powered by", tags$img(src='rstudio-stringr.png', heigth=600, width=600))
  87. }else{
  88. tabsetPanel(tabPanel("Data", tableOutput("textData")),
  89. tabPanel("Plot", plotOutput("ggplot")))}
  90.  
  91. })
  92.  
  93.  
  94. })
Add Comment
Please, Sign In to add comment