jorandradefig

tm.R

Jun 20th, 2021
792
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #########
  2. # Date: 20/6/21
  3. # Author: JA
  4. # #######
  5.  
  6. pacman::p_load(
  7.   tidyverse,
  8.   shiny,
  9.   tm,
  10.   plotly,
  11.   wordcloud,
  12.   treemap,
  13.   viridis
  14. )
  15.  
  16. getwd()
  17.  
  18. songs <- read_csv("jorandradefig/193/input/spotify_songs.csv")
  19. songs <- songs %>%
  20.   filter(language == "es")
  21. #songs[1,]
  22. #songs[,1]
  23. songs <- songs[sample(nrow(songs), 500),]
  24.  
  25. head(songs$lyrics)
  26.  
  27. tail(songs$lyrics)
  28.  
  29. lyrics <- songs$lyrics
  30.  
  31. lyrics <- Corpus(VectorSource(lyrics))
  32.  
  33. stopwords("english")
  34. stopwords("spanish")
  35.  
  36. lyrics <- tm_map(lyrics,removeWords,stopwords("spanish"))
  37. # stemming
  38.  
  39. #writeLines(as.character(lyrics[1]))
  40.  
  41. dtm <- DocumentTermMatrix(lyrics)
  42.  
  43. #tdm <- TermDocumentMatrix(lyrics)
  44.  
  45. #as.matrix(dtm)
  46.  
  47. #as.data.frame(as.matrix(dtm))
  48.  
  49. freq <- colSums(as.matrix(dtm))
  50. #ocurrences <- colSums(as.matrix(dtm))
  51.  
  52. freq <- sort(freq,decreasing=TRUE)
  53.  
  54. findFreqTerms(dtm, lowfreq=200)
  55.  
  56. #names(freq)
  57.  
  58. ocurrences <- data.frame(term=names(freq),ocurrences=freq)
  59.  
  60. plot <- ggplot(
  61.   subset(ocurrences, ocurrences > 300),
  62.   aes(
  63.     x=reorder(term,-ocurrences),
  64.     y=ocurrences
  65.   )
  66. ) +
  67.   geom_bar(stat="identity") +
  68.   theme_minimal() +
  69.   theme(
  70.     axis.text.x = element_text(angle=45,hjust=1)
  71.   )
  72.  
  73. ggplotly(plot)
  74.  
  75. set.seed(150)
  76. wordcloud(
  77.   names(freq),
  78.   freq,
  79.   min.freq=200,
  80.   scale=c(4,0.1),
  81.   colors=brewer.pal(6,"Dark2")
  82. )
  83.  
  84.  
  85. #homicidios %>%
  86. #  group_by(anio) %>%
  87. #  summarise(
  88. #    hom_total=sum(hom_total)
  89. #  )
  90.  
  91. # homicidios %>%
  92. # filter(hom_total > 100)
  93.  
  94. ###############
  95.  
  96. ui <- fluidPage(
  97.   sidebarLayout(
  98.     sidebarPanel(
  99.       selectInput(
  100.         inputId="lista",
  101.         label="Lista de reproducción:",
  102.         selected="Flow Selecto",
  103.         choices=unique(songs$playlist_name)
  104.       )
  105.     ),
  106.     mainPanel(
  107.       tableOutput(
  108.         outputId="tabla"
  109.       ),
  110.       plotlyOutput(
  111.         outputId="plot"
  112.       )
  113.     )
  114.   )
  115. )
  116.  
  117.  
  118.  
  119. server <- function(input,output) {
  120.   output$tabla <- renderTable({
  121.     songs %>%
  122.       filter(playlist_name==input$lista)
  123.   })
  124.  
  125.   # letras <- reactive({
  126.   #   tm_map(
  127.   #     Corpus(VectorSource(lista()$lyrics)),
  128.   #     removeWords,
  129.   #     stopwords("spanish")
  130.   #   )
  131.   # })
  132.   #
  133.   # matriz <- reactive({
  134.   #   DocumentTermMatrix(letras())
  135.   # })
  136.   #
  137.   # frecuencias <- reactive({
  138.   #   colSums(as.matrix(matriz()))
  139.   # })
  140.  
  141.   #repeticiones <- reactive({
  142.   #  data.frame(term=names(frecuencias()),ocurrences=frecuencias())
  143.   #})
  144.  
  145.   #plot <- reactive({
  146.   #  ggplot(
  147.   #    subset(repeticiones(), ocurrences > 300),
  148.   #    aes(
  149.   #      x=reorder(term,-ocurrences),
  150.   #      y=ocurrences
  151.   #    )
  152.   #  ) +
  153.   #    geom_bar(stat="identity") +
  154.   #    theme_minimal() +
  155.   #      axis.text.x = element_text(angle=45,hjust=1)
  156.   #    theme(
  157.   #    )
  158.   #})
  159.  
  160.   #output$plot <- reactive({
  161.   #  ggplotly(plot())
  162.   #})
  163.  
  164.   #set.seed(150)
  165.   #wordcloud(
  166.   #  names(freq),
  167.   #  min.freq=200,
  168.   #  freq,
  169.   #  scale=c(4,0.1),
  170.   #  colors=brewer.pal(6,"Dark2")
  171.   #)
  172. }
  173.  
  174. shinyApp(ui,server)
  175.  
RAW Paste Data