tm.R

#########
# Date: 20/6/21
# Author: JA
# #######

pacman::p_load(
  tidyverse,
  shiny,
  tm,
  plotly,
  wordcloud,
  treemap,
  viridis
)

getwd()

songs <- read_csv("jorandradefig/193/input/spotify_songs.csv")
songs <- songs %>%
  filter(language == "es")
#songs[1,]
#songs[,1]
songs <- songs[sample(nrow(songs), 500),]

head(songs$lyrics)

tail(songs$lyrics)

lyrics <- songs$lyrics

lyrics <- Corpus(VectorSource(lyrics))

stopwords("english")
stopwords("spanish")

lyrics <- tm_map(lyrics,removeWords,stopwords("spanish"))
# stemming

#writeLines(as.character(lyrics[1]))

dtm <- DocumentTermMatrix(lyrics)

#tdm <- TermDocumentMatrix(lyrics)

#as.matrix(dtm)

#as.data.frame(as.matrix(dtm))

freq <- colSums(as.matrix(dtm))
#ocurrences <- colSums(as.matrix(dtm))

freq <- sort(freq,decreasing=TRUE)

findFreqTerms(dtm, lowfreq=200)

#names(freq)

ocurrences <- data.frame(term=names(freq),ocurrences=freq)

plot <- ggplot(
  subset(ocurrences, ocurrences > 300),
  aes(
    x=reorder(term,-ocurrences),
    y=ocurrences
  )
) +
  geom_bar(stat="identity") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle=45,hjust=1)
  )

ggplotly(plot)

set.seed(150)
wordcloud(
  names(freq),
  freq,
  min.freq=200,
  scale=c(4,0.1),
  colors=brewer.pal(6,"Dark2")
)


#homicidios %>%
#  group_by(anio) %>%
#  summarise(
#    hom_total=sum(hom_total)
#  )

# homicidios %>%
# filter(hom_total > 100)

###############

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      selectInput(
        inputId="lista",
        label="Lista de reproducción:",
        selected="Flow Selecto",
        choices=unique(songs$playlist_name)
      )
    ),
    mainPanel(
      tableOutput(
        outputId="tabla"
      ),
      plotlyOutput(
        outputId="plot"
      )
    )
  )
)


server <- function(input,output) {
  output$tabla <- renderTable({
    songs %>%
      filter(playlist_name==input$lista)
  })

  # letras <- reactive({
  #   tm_map(
  #     Corpus(VectorSource(lista()$lyrics)),
  #     removeWords,
  #     stopwords("spanish")
  #   )
  # })
  #
  # matriz <- reactive({
  #   DocumentTermMatrix(letras())
  # })
  #
  # frecuencias <- reactive({
  #   colSums(as.matrix(matriz()))
  # })

  #repeticiones <- reactive({
  #  data.frame(term=names(frecuencias()),ocurrences=frecuencias())
  #})

  #plot <- reactive({
  #  ggplot(
  #    subset(repeticiones(), ocurrences > 300),
  #    aes(
  #      x=reorder(term,-ocurrences),
  #      y=ocurrences
  #    )
  #  ) +
  #    geom_bar(stat="identity") +
  #    theme_minimal() +
  #      axis.text.x = element_text(angle=45,hjust=1)
  #    theme(
  #    )
  #})

  #output$plot <- reactive({
  #  ggplotly(plot())
  #})

  #set.seed(150)
  #wordcloud(
  #  names(freq),
  #  min.freq=200,
  #  freq,
  #  scale=c(4,0.1),
  #  colors=brewer.pal(6,"Dark2")
  #)
}

shinyApp(ui,server)