Guest User

Untitled

a guest
Mar 23rd, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.06 KB | None | 0 0
  1. public static void main(String[] argv) throws Exception {
  2. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
  3. dbf.setValidating(false);
  4. dbf.setNamespaceAware(true);
  5. dbf.setIgnoringComments(false);
  6. dbf.setIgnoringElementContentWhitespace(false);
  7. dbf.setExpandEntityReferences(false);
  8. DocumentBuilder db = dbf.newDocumentBuilder();
  9. Document doc = db.parse(new InputSource(new StringReader("<table border="1"> <tbody><tr><td colspan="1" rowspan="1"><br>OrganizationY</br></td><td colspan="1" rowspan="1">Company</td></tr><tr><td>(//no content here.so want delete this td)</td><td></td></tr><tr><td>Synerzip</td><td>Rezoomex</td></tr></tbody></table>)));
  10. Element element = doc.getDocumentElement();
  11. printElement(element, "");
  12. TransformerFactory transformerFactory = TransformerFactory.newInstance();
  13. Transformer transformer = transformerFactory.newTransformer();
  14. DOMSource source = new DOMSource(doc);
  15. StreamResult result = new StreamResult(new File("clean.html"));
  16. transformer.transform(source, result);
  17. }
  18.  
  19. static void printElement(Element element, String indent) {
  20. List<Element> toRemove = new LinkedList<Element>();
  21. if (element == null) return;
  22. System.out.println("Element '" + element.getNodeName() + "'");
  23. NodeList children = element.getChildNodes();
  24. System.out.println("child :::::::" + children);
  25. for (int i = 0; i < children.getLength(); i++) {
  26. Node child = children.item(i);
  27. switch (child.getNodeType()) {
  28. case Node.ELEMENT_NODE:
  29. printElement((Element) child, indent + "t");
  30. break;
  31. case Node.ATTRIBUTE_NODE:
  32. Attr attr = (Attr) child;
  33. System.out.println("tAttribute: '" + attr.getName() + "' = '" + attr.getValue() + "'");
  34. break;
  35. case Node.TEXT_NODE:
  36. Text text = (Text) child;
  37. System.out.println("tText: '" + text.getData() + "'");
  38. if(text.getLength() <= 1) {
  39. Element childElement = child; // not able to cast it . if I did (element)child. Then it is not giving me expected output.
  40. System.out.println(":Child element ::::::"+elementIsRedundant(element));
  41. printElement(childElement,"");
  42. if (elementIsRedundant(childElement)) {
  43. toRemove.add(childElement);
  44. }
  45. }
  46. break;
  47. default:
  48. System.out.println("tUnknown node type: '" + child.getNodeType() + "'");
  49. break;
  50. }
  51. }
  52. }
  53.  
  54. private static boolean elementIsRedundant(Element element) {
  55. if (element.hasAttributes())
  56. return false;
  57. if (!element.hasChildNodes())
  58. return true;
  59. NodeList children = element.getChildNodes();
  60. int childrenCount = children.getLength();
  61. for (int i = 0; i < childrenCount; ++i) {
  62. Node child = children.item(i);
  63. String value = child.getNodeValue();
  64. if (value != null && !value.matches("\s*")) {
  65. return false; // Found non-whitespace text
  66. }
  67. }
  68. return true;
Add Comment
Please, Sign In to add comment