Advertisement
theitd

Word Cloud MySQL

Nov 4th, 2018
276
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MySQL 2.08 KB | None | 0 0
  1. -- Given table NEWS with textual column FULL_TEXT
  2. DROP PROCEDURE IF EXISTS populate_counter;
  3. DELIMITER //
  4. CREATE PROCEDURE populate_counter ()
  5. BEGIN
  6.     DROP TABLE IF EXISTS counter;
  7.     CREATE TABLE counter (
  8.         id int unsigned NOT NULL AUTO_INCREMENT,
  9.         word VARCHAR(8000),
  10.         usageCount int(20),
  11.         primary key (id)
  12.     );
  13.    
  14.     SET @wordCt  = 0;
  15.     SET @tokenCt = 1;
  16.  
  17.     contentLoop: LOOP
  18.         SET @stmt = 'INSERT INTO counter (word) SELECT REPLACE(SUBSTRING(SUBSTRING_INDEX(`full_text`, " ", ?),
  19.                                LENGTH(SUBSTRING_INDEX(`full_text`, " ", ? -1)) + 1),
  20.                                " ", "") word
  21.                     FROM news
  22.                     WHERE LENGTH(SUBSTRING_INDEX(`full_text`, " ", ?)) != LENGTH(`full_text`)';
  23.         PREPARE cmd FROM @stmt;
  24.         EXECUTE cmd USING @tokenCt, @tokenCt, @tokenCt;
  25.         SELECT ROW_COUNT() INTO @wordCt;
  26.         DEALLOCATE PREPARE cmd;
  27.         IF (@wordCt = 0) THEN
  28.             LEAVE contentLoop;
  29.         ELSE
  30.             SET @tokenCt = @tokenCt + 1;
  31.         END IF;
  32.     END LOOP;
  33.  
  34. END //
  35. DELIMITER ;
  36.  
  37. call populate_counter();
  38.  
  39. -- Now clean up COUNTER by removing common words
  40. DROP PROCEDURE IF EXISTS common_exclude;
  41. DELIMITER //
  42. CREATE PROCEDURE common_exclude ()
  43. BEGIN
  44.  
  45. DECLARE cursor_ID INT;
  46. DECLARE cursor_VAL VARCHAR(100);
  47. DECLARE done INT DEFAULT 0;
  48.  
  49. DECLARE cursor_i CURSOR FOR
  50. SELECT id,word FROM common_words;
  51.  
  52. DECLARE CONTINUE HANDLER FOR NOT FOUND SET done=1;
  53.  
  54. OPEN cursor_i;
  55.     contentLoop: LOOP
  56.  
  57.         FETCH cursor_i INTO cursor_ID, cursor_VAL;
  58.         IF done THEN
  59.           LEAVE contentLoop;
  60.         END IF;
  61.    
  62.         DELETE FROM counter WHERE word = cursor_VAL;
  63.     END LOOP;
  64.  
  65. CLOSE cursor_i;
  66. END //
  67. DELIMITER ;
  68.  
  69. call common_exclude();
  70.  
  71. DELETE FROM counter WHERE CHAR_LENGTH(word) <= 2;
  72. DELETE FROM counter WHERE word LIKE '%>%' OR word LIKE '%/%' OR word LIKE '%<%';
  73.  
  74. DROP TABLE IF EXISTS top_words;
  75. CREATE TABLE top_words SELECT word, count(*) usageCount FROM counter GROUP BY word ORDER BY usageCount DESC;
  76.  
  77. SELECT * FROM top_words;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement