Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static void main(String[] argv) throws Exception {
- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
- dbf.setValidating(false);
- dbf.setNamespaceAware(true);
- dbf.setIgnoringComments(false);
- dbf.setIgnoringElementContentWhitespace(false);
- dbf.setExpandEntityReferences(false);
- DocumentBuilder db = dbf.newDocumentBuilder();
- Document doc = db.parse(new InputSource(new StringReader("<table border="1"> <tbody><tr><td colspan="1" rowspan="1"><br>OrganizationY</br></td><td colspan="1" rowspan="1">Company</td></tr><tr><td>(//no content here.so want delete this td)</td><td></td></tr><tr><td>Synerzip</td><td>Rezoomex</td></tr></tbody></table>)));
- Element element = doc.getDocumentElement();
- printElement(element, "");
- TransformerFactory transformerFactory = TransformerFactory.newInstance();
- Transformer transformer = transformerFactory.newTransformer();
- DOMSource source = new DOMSource(doc);
- StreamResult result = new StreamResult(new File("clean.html"));
- transformer.transform(source, result);
- }
- static void printElement(Element element, String indent) {
- List<Element> toRemove = new LinkedList<Element>();
- if (element == null) return;
- System.out.println("Element '" + element.getNodeName() + "'");
- NodeList children = element.getChildNodes();
- System.out.println("child :::::::" + children);
- for (int i = 0; i < children.getLength(); i++) {
- Node child = children.item(i);
- switch (child.getNodeType()) {
- case Node.ELEMENT_NODE:
- printElement((Element) child, indent + "t");
- break;
- case Node.ATTRIBUTE_NODE:
- Attr attr = (Attr) child;
- System.out.println("tAttribute: '" + attr.getName() + "' = '" + attr.getValue() + "'");
- break;
- case Node.TEXT_NODE:
- Text text = (Text) child;
- System.out.println("tText: '" + text.getData() + "'");
- if(text.getLength() <= 1) {
- Element childElement = child; // not able to cast it . if I did (element)child. Then it is not giving me expected output.
- System.out.println(":Child element ::::::"+elementIsRedundant(element));
- printElement(childElement,"");
- if (elementIsRedundant(childElement)) {
- toRemove.add(childElement);
- }
- }
- break;
- default:
- System.out.println("tUnknown node type: '" + child.getNodeType() + "'");
- break;
- }
- }
- }
- private static boolean elementIsRedundant(Element element) {
- if (element.hasAttributes())
- return false;
- if (!element.hasChildNodes())
- return true;
- NodeList children = element.getChildNodes();
- int childrenCount = children.getLength();
- for (int i = 0; i < childrenCount; ++i) {
- Node child = children.item(i);
- String value = child.getNodeValue();
- if (value != null && !value.matches("\s*")) {
- return false; // Found non-whitespace text
- }
- }
- return true;
Add Comment
Please, Sign In to add comment