Guest User

Untitled

a guest
Dec 17th, 2018
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.98 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <cctype>
  3. #include <vector>
  4. #include <string.h>
  5. #include <regex.h>
  6. #include <boost/algorithm/string.hpp>
  7. #include <boost/regex.hpp>
  8. #include <algorithm>
  9. #include <map>
  10. #include <mysql.h>
  11.  
  12.  
  13. using namespace std;
  14. using namespace boost;
  15.  
  16. string StripTags(string HTML);
  17.  
  18. // -----------------------------------------------------------------------------
  19.  
  20. string StripTags(string HTML)
  21. {
  22. bool InTag=false;
  23. int Counter=0;
  24. while(Counter<HTML.length())
  25. {
  26.  
  27. if("<"==HTML.substr(Counter,1))
  28. {
  29. InTag=true;
  30. }
  31. else if(">"==HTML.substr(Counter,1))
  32. {
  33. HTML.replace(Counter,1,"");
  34. Counter--;
  35. InTag=false;
  36. }
  37. if(true==InTag)
  38. {
  39. HTML.replace(Counter,1,"");
  40. Counter--;
  41. }
  42.  
  43. Counter++;
  44. }
  45. return HTML;
  46. }
  47.  
  48.  
  49. string tableName = "keywords";
  50. // -----------------------------------------------------------------------------
  51. int iMaxWords = 3;
  52.  
  53. class oPhraseParser {
  54. private:
  55. vector<string> aList;
  56. vector<string> aPhraseList;
  57. map<string, int> PhraseData;
  58.  
  59. bool is_number(const std::string& s)
  60. {
  61. for (int i = 0; i < s.length(); i++) {
  62. if (!std::isdigit(s[i]) )
  63. if (s[i] != '.') {
  64. return false;
  65. }
  66. }
  67.  
  68. return true;
  69. }
  70. bool allowParse(string Word) {
  71. if (is_number(Word)) {
  72. return false;
  73. }
  74. return true;
  75. }
  76. void addPhrase(vector<string>strs, int iNr, int iPlus) {
  77. int i = 0;
  78. string phrase = "";
  79. int iCurrent2, iWordsCount;
  80. iWordsCount = 0;
  81. for (i=0; i<=iPlus; i++) {
  82. iCurrent2 = iNr + i;
  83. if (strs.size() > iCurrent2) {
  84. string myWord = strs[iCurrent2];
  85. trim(myWord);
  86. if (!is_number(myWord)) {
  87. phrase = phrase + " " + myWord;
  88. iWordsCount++;
  89. }
  90. }
  91. }
  92. trim(phrase);
  93. if (iWordsCount > 1) {
  94. if (PhraseData[phrase]) {
  95. PhraseData[phrase]++;
  96. }
  97. else {
  98. PhraseData[phrase] = 1;
  99. }
  100. }
  101. }
  102.  
  103. public:
  104. void getResult() {
  105. printf("DROP TABLE IF EXISTS %s;\n", tableName.c_str());
  106. printf("CREATE TABLE `%s` (`keyword` varchar(255) NOT NULL,`freq` int(11) NOT NULL) ENGINE=MyISAM DEFAULT CHARSET=utf8;\n", tableName.c_str());
  107. int i = 0;
  108. for( map<string, int>::iterator ii=PhraseData.begin(); ii!=PhraseData.end(); ++ii)
  109. {
  110. if ((*ii).second > 1 && (*ii).first!="") {
  111. if (i!=0) {
  112. printf (",\n");
  113. }
  114. else {
  115. printf("INSERT INTO `%s` VALUES ", tableName.c_str());
  116. }
  117. i++;
  118.  
  119. // char *s1 = (*ii).first;
  120. // char *s2 = (*ii).second;
  121.  
  122. int skaicius = (*ii).second;
  123.  
  124. printf("('%s'",(*ii).first.c_str());
  125. printf(",'%d')",skaicius);
  126. if (i >= 1000) {
  127. printf (";\n");
  128. i = 0;
  129. }
  130.  
  131. }
  132. }
  133. if (i != 0) {
  134. printf (";");
  135. }
  136. }
  137. void add(string text) {
  138.  
  139. vector<string> strs;
  140. int iNr;
  141.  
  142.  
  143. trim(text); //triming whitespaces
  144. text.erase(std::remove(text.begin(), text.end(), '\n'), text.end()); //remove new line
  145.  
  146. text = StripTags(text);
  147. boost::split(strs, text, boost::is_any_of(" .:;,"));
  148. for (iNr = 0; iNr < strs.size(); iNr++) {
  149.  
  150. if (allowParse(strs[iNr]) == true) {
  151. int iCurrent = 0;
  152. while (iCurrent < iMaxWords && (iCurrent+iNr) < strs.size()) {
  153. addPhrase(strs, iNr, iCurrent);
  154. iCurrent++;
  155. }
  156. }
  157. }
  158.  
  159. }
  160. };
  161.  
  162.  
  163. int main (int argc, const char * argv[])
  164. {
  165.  
  166. oPhraseParser oParser;
  167.  
  168.  
  169. MYSQL *conn;
  170. MYSQL_RES *res;
  171. MYSQL_ROW row;
  172.  
  173. char *server = "localhost";
  174. char *user = "root";
  175. char *password = "vaidaszilionis";
  176. char *database = "sphinx";
  177.  
  178. conn = mysql_init(NULL);
  179.  
  180. if (!mysql_real_connect(conn, server,
  181. user, password, database, 0, NULL, 0)) {
  182. fprintf(stderr, "%s\n", mysql_error(conn));
  183. exit(1);
  184. }
  185.  
  186. if (mysql_query(conn, "Select concat(title, ' ',description,' ',body) as mytext FROM temp limit 100")) {
  187. fprintf(stderr, "%s\n", mysql_error(conn));
  188. exit(1);
  189. }
  190.  
  191. res = mysql_use_result(conn);
  192.  
  193. //output table name
  194. while ((row = mysql_fetch_row(res)) != NULL) {
  195. oParser.add(row[0]);
  196. }
  197.  
  198. mysql_free_result(res);
  199. mysql_close(conn);
  200.  
  201. oParser.getResult();
  202. return 0;
  203. }
Add Comment
Please, Sign In to add comment