Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package edu.wiki.demo;
- import edu.wiki.api.concept.IConceptIterator;
- import edu.wiki.api.concept.IConceptVector;
- import edu.wiki.search.ESASearcher;
- import java.io.*;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.*;
- import java.util.logging.Logger;
- public class TestGeneralESAVectors {
- static Connection connection;
- static Statement stmtQuery;
- private ESASearcher searcher;
- private static final Logger LOGGER = Logger.getLogger(TestGeneralESAVectors.class.getName());
- private static boolean isDone = false;
- public static void initDB() throws ClassNotFoundException, SQLException, IOException {
- // Load the JDBC driver
- String driverName = "com.mysql.jdbc.Driver"; // MySQL Connector
- Class.forName(driverName);
- // read DB config
- InputStream is = ESASearcher.class.getResourceAsStream("/config/db.conf");
- BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
- String serverName = br.readLine();
- String mydatabase = br.readLine();
- String username = br.readLine();
- String password = "123456";
- br.close();
- // Create a connection to the database
- String url = "jdbc:mysql://" + serverName + "/" + mydatabase; // a JDBC url
- connection = DriverManager.getConnection(url, username, password);
- stmtQuery = connection.createStatement();
- stmtQuery.setFetchSize(100);
- }
- /**
- * @throws IOException
- * @throws SQLException
- * @throws ClassNotFoundException
- */
- public String getVector(String text) throws ClassNotFoundException, SQLException, IOException {
- if (!isDone) {
- searcher = new ESASearcher();
- initDB();
- isDone = true;
- }
- int limit = 500000; //set limit to big value to get all concepts
- IConceptVector cvBase = searcher.getConceptVector(text);
- IConceptVector cvNormal = searcher.getNormalVector(cvBase, limit);
- if (cvNormal == null) {
- LOGGER.info("empty concept vector => " + text);
- return "";
- }
- IConceptIterator it = cvNormal.orderedIterator();
- int count = 0;
- TreeMap<Integer, Double> tree = new TreeMap<>();
- while (it.next() && count < limit) {
- tree.put(it.getId(), it.getValue());
- count++;
- }
- String ret = "";
- for (Map.Entry<Integer, Double> entry : tree.entrySet()) {
- ret += " " + entry.getKey() + ":" + entry.getValue();
- }
- return ret;
- }
- private String getLine(String line) throws SQLException, IOException, ClassNotFoundException {
- String arr[] = line.split("\\s");
- String s = arr[1];
- for (int i = 2; i < arr.length; i++) {
- s += " " + arr[i];
- }
- String vector = getVector(s);
- return (arr[0].substring(0, arr[0].length() - 1) + vector);
- }
- private void start(String inputFile, String outputFile) throws IOException, SQLException, ClassNotFoundException {
- PrintWriter writer = new PrintWriter(outputFile);
- Scanner input = new Scanner(new File(inputFile));
- int cnt = 0;
- while (input.hasNextLine()) {
- String line = input.nextLine();
- String res = getLine(line);
- writer.println(res);
- cnt++;
- if (cnt % 1000 == 0)
- LOGGER.info(cnt + " lines are done");
- }
- writer.close();
- input.close();
- }
- public static void main(String args[]) throws IOException, SQLException, ClassNotFoundException {
- TestGeneralESAVectors test = new TestGeneralESAVectors();
- test.start("/home/moustah/PycharmProjects/twitter/twitter API/training/partitions/part_0.txt", "part_0_output.txt");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement