Advertisement
Guest User

Untitled

a guest
Mar 9th, 2017
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.35 KB | None | 0 0
  1. package edu.wiki.demo;
  2.  
  3. import edu.wiki.api.concept.IConceptIterator;
  4. import edu.wiki.api.concept.IConceptVector;
  5. import edu.wiki.search.ESASearcher;
  6.  
  7. import java.io.*;
  8. import java.sql.Connection;
  9. import java.sql.DriverManager;
  10. import java.sql.SQLException;
  11. import java.sql.Statement;
  12. import java.util.*;
  13. import java.util.logging.Logger;
  14.  
  15. public class TestGeneralESAVectors {
  16.  
  17. static Connection connection;
  18. static Statement stmtQuery;
  19. private ESASearcher searcher;
  20. private static final Logger LOGGER = Logger.getLogger(TestGeneralESAVectors.class.getName());
  21. private static boolean isDone = false;
  22.  
  23. public static void initDB() throws ClassNotFoundException, SQLException, IOException {
  24. // Load the JDBC driver
  25. String driverName = "com.mysql.jdbc.Driver"; // MySQL Connector
  26. Class.forName(driverName);
  27.  
  28. // read DB config
  29. InputStream is = ESASearcher.class.getResourceAsStream("/config/db.conf");
  30. BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
  31. String serverName = br.readLine();
  32. String mydatabase = br.readLine();
  33. String username = br.readLine();
  34. String password = "123456";
  35. br.close();
  36.  
  37. // Create a connection to the database
  38. String url = "jdbc:mysql://" + serverName + "/" + mydatabase; // a JDBC url
  39. connection = DriverManager.getConnection(url, username, password);
  40. stmtQuery = connection.createStatement();
  41. stmtQuery.setFetchSize(100);
  42. }
  43.  
  44. /**
  45. * @throws IOException
  46. * @throws SQLException
  47. * @throws ClassNotFoundException
  48. */
  49. public String getVector(String text) throws ClassNotFoundException, SQLException, IOException {
  50. if (!isDone) {
  51. searcher = new ESASearcher();
  52. initDB();
  53. isDone = true;
  54. }
  55. int limit = 500000; //set limit to big value to get all concepts
  56. IConceptVector cvBase = searcher.getConceptVector(text);
  57. IConceptVector cvNormal = searcher.getNormalVector(cvBase, limit);
  58. if (cvNormal == null) {
  59. LOGGER.info("empty concept vector => " + text);
  60. return "";
  61. }
  62.  
  63. IConceptIterator it = cvNormal.orderedIterator();
  64.  
  65. int count = 0;
  66. TreeMap<Integer, Double> tree = new TreeMap<>();
  67. while (it.next() && count < limit) {
  68. tree.put(it.getId(), it.getValue());
  69. count++;
  70. }
  71.  
  72. String ret = "";
  73. for (Map.Entry<Integer, Double> entry : tree.entrySet()) {
  74. ret += " " + entry.getKey() + ":" + entry.getValue();
  75. }
  76.  
  77. return ret;
  78. }
  79.  
  80. private String getLine(String line) throws SQLException, IOException, ClassNotFoundException {
  81. //1,2,3, text tweet_id
  82. //arr[0] arr[1] arr[arr.length-1]
  83. String arr[] = line.split("\\s");
  84. String label = arr[0];
  85. String tweetId = arr[arr.length - 1];
  86. String text = "";
  87. for (int i = 1; i < arr.length - 1; i++) {
  88. text += " " + arr[i];
  89. }
  90. String vector = getVector(text);
  91. //removing the last comma from label
  92. return (label.substring(0, label.length() - 1) + vector + " #" + tweetId);
  93. }
  94.  
  95. private void start(String inputFile, String outputFile) throws IOException, SQLException, ClassNotFoundException {
  96. PrintWriter writer = new PrintWriter(outputFile);
  97. BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile), "UTF-8"));
  98. int cnt = 0;
  99. String line;
  100. while ((line = br.readLine()) != null) {
  101. String res = getLine(line);
  102. writer.println(res);
  103. cnt++;
  104. if (cnt % 1000 == 0)
  105. LOGGER.info(cnt + " lines are done");
  106. }
  107. writer.close();
  108. }
  109.  
  110. public static void main(String args[]) throws IOException, SQLException, ClassNotFoundException {
  111. if (args != null && args.length == 2) {
  112. TestGeneralESAVectors test = new TestGeneralESAVectors();
  113. LOGGER.info("source file " + args[0] + " " + "destination file " + args[1]);
  114. test.start(args[0], args[1]);
  115. } else {
  116. LOGGER.info("wrong number of args -> system exit with -1");
  117. System.exit(-1);
  118. }
  119. }
  120. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement