Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- private void jButton2ActionPerformed(java.awt.event.ActionEvent evt) {
- //File file = new File(name);
- //ff=file.listFiles();
- int[] mass;
- String regex = "";
- String regexSyfix = "";
- String regexSyfixEnd = "";
- String regexEnd = "";
- String regexPretext = "";
- String s;
- StringBuffer sb;
- Pattern p;
- Matcher m;
- Pattern p1;
- Matcher m1;
- int i;
- String end;
- File file = new File(name);
- ff=file.listFiles();
- Database.getInstance().openConnection();
- ArrayList<String> pretext = Database.getInstance().getAllPretext();
- ArrayList<String> array = Database.getInstance().getAllEndOf();
- ArrayList<String> array1 = Database.getInstance().getAllSufix();
- ArrayList<String> array2 = new ArrayList<>();
- array2.addAll(array1);
- array2.addAll(array);
- for (String el : array2) {
- regex += "\\b[^А-Яа-яіїє][а-яіїє’]*" + el + "\\b(?!’)|";
- }
- regex = regex.substring(0, regex.length() - 1);
- for (String el : array1) {
- regexSyfix += el + "|";
- regexSyfixEnd += el + "\\b(?!’)|";
- }
- for (String el : array) {
- regexEnd += el + "\\b(?!’)|";
- }
- for (String el : pretext) {
- regexPretext += el + "\\s*$|";
- }
- regexSyfixEnd = regexSyfixEnd.substring(0, regexSyfixEnd.length() - 1);
- regexSyfix = regexSyfix.substring(0, regexSyfix.length() - 1);
- regexEnd = regexEnd.substring(0, regexEnd.length() - 1);
- regexPretext = regexPretext.substring(0, regexPretext.length() - 1);
- //Map<Integer, HashMap <Integer,HashSet<Integer>>> charct = new HashMap<Integer, HashMap<Integer,HashSet<Integer>>>();
- // Map<Integer, HashMap <String, HashSet> charct = new HashMap<>();
- Map <Integer, HashMap<String, HashSet<Integer>>> charct = new HashMap<>();
- //Map <String, Integer> charct = new HashMap <String, Integer>();
- for (int ii = 1; ii < 5; ii++) {
- charct.put(ii, new HashMap<String, HashSet<Integer>>());
- charct.get(ii).put("V", new HashSet<Integer>());
- charct.get(ii).put("N", new HashSet<Integer>());
- charct.get(ii).put("R", new HashSet<Integer>());
- }
- for (File entry : file.listFiles()) {
- try {
- s = WorkWithText.read(entry);
- sb = new StringBuffer(s);
- p = Pattern.compile(regex);
- m = p.matcher(s);
- i = 0;
- while (m.find()) {
- if (m.group().replaceAll("ти\\b", "").length() == m.group().length()) {
- p1 = Pattern.compile(regexSyfix);
- m1 = p1.matcher(m.group());
- if (m1.find()) {
- end = getEnd(regexEnd, m.group());
- if (end.length() == 0) {
- sb.insert(m.start() + i + m.group().length(), " <pos=\"N\">");
- i += 10;
- sb.insert(m.start() + m.group().length() + i, "</>");
- i += 3;
- } else {
- String concat = m.group().substring(0, m.group().length() - end.length());
- if (getEnd(regexSyfixEnd, concat).length() != 0) {
- Pattern p2 = Pattern.compile(regexPretext);
- Matcher m2 = p2.matcher(sb.substring(m.start() - 16, m.start() - 1));
- HashMap<Integer, int[]> map = Database.getInstance().getAllIdOfEnding(end);
- if (m2.find()) {
- String str = "";
- for (Map.Entry<Integer, int[]> entr : map.entrySet()) {
- if (entr.getValue()[1] != 1) {
- getProperties(entr.getValue(), charct);
- }
- }
- str = getStringProp(charct);
- sb.insert(m.start() + i + m.group().length(), " <pos=\"N\" gram=" + str + ">");
- i += str.length() + 17;
- sb.insert(m.start() + m.group().length() + i, "</>");
- i += 3;
- } else {
- // <pos="N" gram="I, i, s, c | f | m" />
- String str1 = "";
- for (Map.Entry<Integer, int[]> entr : map.entrySet()) {
- getProperties(entr.getValue(), charct);
- }
- str1 = getStringProp(charct);
- sb.insert(m.start() + i + m.group().length(), " <pos=\"N\" gram=" + str1 + ">");
- i += str1.length() + 17;
- sb.insert(m.start() + m.group().length() + i, "</>");
- i += 3;
- }
- }
- }
- // mass = Database.getInstance().getCharacteristicsOfId(Database.getInstance().getIdEndOf(end));
- }
- }
- }
- writeUsingFiles(sb.toString().replaceAll("\n", "\r\n"), entry.getName());
- } catch (FileNotFoundException ex) {
- Logger.getLogger(Analyzer.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement