Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.PrintStream;
- import java.util.logging.Level;
- import java.util.logging.Logger;
- import javax.xml.parsers.DocumentBuilder;
- import javax.xml.parsers.DocumentBuilderFactory;
- import javax.xml.parsers.ParserConfigurationException;
- import org.w3c.dom.*;
- import org.w3c.dom.NodeList;
- public class Main {
- public static void main(String args[]) {
- try {
- File file = new File("D:\\parse.xml");
- File out = new File("D:\\parseXXX.xml");
- System.setOut(new PrintStream(out));
- DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
- Document doc = dBuilder.parse(file);
- NodeList nodelist_of_p = doc.getElementsByTagName("p");
- int count = 1;//count for sn
- boolean hasMultiple = false;
- for (int i = 0; i < nodelist_of_p.getLength(); i++) {
- String word = "", def = "";
- StringBuilder sb = new StringBuilder("");
- //Node node = nodelist_of_p.item(i); // each p
- NodeList nodelist_under_p = nodelist_of_p.item(i).getChildNodes(); //elements under p
- for (int c = 0; c < nodelist_under_p.getLength(); c++) {
- Node items_under_p = nodelist_under_p.item(c);
- if (items_under_p.getNodeName().equals("sn")) {
- hasMultiple = true;
- int x;
- for (x = i, count = 1; x < nodelist_of_p.getLength(); x++, count++) {
- sb.append("<sn>").append(count).append("</sn>");
- //c++;
- NodeList def_of_sn;
- def_of_sn = nodelist_of_p.item(x).getChildNodes();
- sb.append("<mdef>").append(extractDefofSN(def_of_sn)).append("</mdef>");
- NodeList p = nodelist_of_p.item(x + 1).getChildNodes();
- if (!containsNode(p, "sn")) {
- break;
- }
- }
- i = x;
- } else if (items_under_p.getNodeName().equals("hw")) {
- word = items_under_p.getTextContent();
- //temp.setTextContent(discardSym(word));
- hasMultiple = false;
- count = 1;
- } else if (items_under_p.getNodeName().equals("def")) {
- // hasMultiple = false;
- def = items_under_p.getTextContent();
- }
- }
- System.out.print("<body>");
- System.out.print("<word>");
- System.out.print(discardSym(word));
- System.out.print("</word> ");
- if (hasMultiple) {
- System.out.print(sb.toString());
- // hasMultiple = false;
- } else {
- System.out.print("<def>");
- System.out.print(def);
- System.out.print("</def>");
- }
- System.out.println("</body>");
- //to clear the StringBuilder
- /* int l = sb.length();
- sb.delete(0, l);
- */
- }
- } catch (Exception ex) {
- Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- static String discardSym(String str) {
- String ret = "";
- for (int i = 0; i < str.length(); i++) {
- if (Character.isAlphabetic(str.charAt(i))) {
- ret = ret + str.charAt(i);
- }
- }
- return ret;
- }
- static boolean containsNode(NodeList n, String tag) {
- for (int v = 0; v < n.getLength(); v++) {
- if (n.item(v).getNodeName().equals(tag)) {
- return true;
- }
- }
- return false;
- }
- static String extractDefofSN(NodeList n){
- for(int i=0; i<n.getLength(); i++){
- if(n.item(i).getNodeName().equals("def"))
- return n.item(i).getTextContent();
- }
- return "NULL";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement