Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package Parsing;
- import static java.util.stream.Collectors.toMap;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintWriter;
- import java.io.UnsupportedEncodingException;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.LinkedHashMap;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import com.github.javaparser.ParseException;
- import Structures.Attribute;
- import Structures.Class;
- import Structures.Comment;
- import Structures.Method;
- import Structures.Package;
- import Structures.Variable;
- public class ApplicationParse {
- public static void main(String[] args) throws ParseException, IOException {
- String path = "C:\\Users\\THINKPAD W541\\eclipse-workspace\\org.eclipse.acceleo-3.7.8";
- File projectDir = new File(path);
- DB.connect();
- String projectName = new File(path).getName();
- System.out.println("Projet ------- " + projectName);
- ListClassParsed.listClasses(projectDir, projectName);
- String vocabulaire = "";
- StanfordLemmatizer lematizer = new StanfordLemmatizer();
- vocabulaire = Method.extractName(DB.connection);
- /*try {
- vocabulaire = Attribute.extractName(DB.connection);
- PrintWriter writer = new PrintWriter("Vocabulaire_Attributes.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- vocabulaire = Class.extractName(DB.connection);
- writer = new PrintWriter("Vocabulaire_Classes.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- vocabulaire = Comment.extractContent(DB.connection);
- wordCount(vocabulaire);
- writer = new PrintWriter("Vocabulaire_Comments.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- vocabulaire = Method.extractName(DB.connection);
- writer = new PrintWriter("Vocabulaire_Methods.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- vocabulaire = Package.extractName(DB.connection);
- writer = new PrintWriter("Vocabulaire_Packages.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- vocabulaire = Variable.extractName(DB.connection);
- writer = new PrintWriter("Vocabulaire_Variables.txt", "UTF-8");
- writer.print(remDoubleMot(lematizer.lemmatize(vocabulaire)));
- writer.close();
- } catch (FileNotFoundException | UnsupportedEncodingException e1) {
- System.err.println(e1.getMessage());
- }*/
- try {
- DB.connection.close();
- } catch (SQLException e) {
- System.out.println(e.getMessage());
- }
- }
- public static List remDoubleMot(List<String> list) {
- List<String> newlist = new ArrayList();
- Set<String> uniqueWords = new HashSet<String>(list);
- for (String word : uniqueWords) {
- if (Collections.frequency(list, word) > 0) {
- newlist.add(word);
- }
- }
- return newlist;
- }
- static void wordCount(String text) {
- String[] b = text.split(" ");
- HashMap<String, Integer> freqMap = new HashMap<String, Integer>();
- for (int i = 0; i < b.length; i++) {
- String key = b[i];
- int freq = freqMap.getOrDefault(key, 0);
- freqMap.put(key, ++freq);
- }
- freqMap = freqMap.entrySet().stream().sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
- .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2, LinkedHashMap::new));
- System.out.println(freqMap);
- //sortByValue(freqMap);
- /*
- * for (Entry<String, Integer> result : freqMap.entrySet()) {
- * System.out.println(result.getKey() + " " + result.getValue()); }
- */
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement