How to read comment text with SAX Java parser

2.6k Views Asked by At

I want read the comment only for the object tag in my XML file using SAX parser in Java.

This is an abstract of my file:

<!-- Object Seed term: day, WikiTitle: day-->
<object id="15155220" name="solar day, twenty-four hour period, 24-hour interval, mean solar day, twenty-four hours, si day, día, days, si days, day duration, day, civil day">
    <!-- class: "calendar day" -->
    <class id="15157041" name="calendar day, civil day"></class>
    <!-- class: "unit of time" -->
    <class id="15154774" name="time units, unit of time, time unit, units of time"></class>
    <!-- class: "" -->
    <class id="15113229" name="period of time, time period, period"></class>
    <!-- class: "" -->
    <class id="00000000" name="time"></class>
    <genericPhysicalDescription>
        <!-- hasPart: "" -->
        <hasPart id="15228378" name="hour, time of day"></hasPart>
        <!-- hasPart: "" -->
        <hasPart id="15157225" name="day"></hasPart>
        <!-- partOf: "calendar" -->
        <partOf id="15173479" name="calendrics, calendar, dating style, calendarist, calendars, birthday calendar, calendar strip, secular calendar, calandar, agriculture calendar, calendar system, criminal calendar"></partOf>
        <!-- partOf: "" -->
        <partOf id="15206296" name="month"></partOf>
        <!-- partOf: "" -->
        <partOf id="15157225" name="day"></partOf>
    </genericPhysicalDescription>
</object>
1

There are 1 best solutions below

2
Ferdinand Prantl On BEST ANSWER

The javax.xml.parsers.SAXParser does not support reading comments. It ignores them.

The org.xml.sax.ext.LexicalHandler allows you catching comments when parsing with org.xml.sax.XMLReader. See an example at another stackoverflow post or a tutorial at Oracle.

If you want to connect a comment to an element, which comes right after it, you can additionally pass a org.xml.sax.ContentHandler to the parser and track other XML content by it. I adapted the code that referred to above to print only that object element, which is immediately preceded by a comment:

import org.xml.sax.*;
import org.xml.sax.ext.*;
import org.xml.sax.helpers.*;

import java.io.IOException;

public class Test implements LexicalHandler, ContentHandler {

  private String  lastComment;

  public void startDTD(String name, String publicId, String systemId) throws SAXException {
  }
  public void endDTD() throws SAXException {
  }
  public void startEntity(String name) throws SAXException {
  }
  public void endEntity(String name) throws SAXException {
  }
  public void startCDATA() throws SAXException {
  }
  public void endCDATA() throws SAXException {
  }
  public void comment(char[] text, int start, int length) throws SAXException {
    this.lastComment = new String(text, start, length).trim();
  }

  public void characters(char[] ch, int start, int length) {
  }
  public void endDocument() {
  }
  public void endElement(String uri, String localName, String qName) {
  }
  public void endPrefixMapping(String prefix) {
  }
  public void ignorableWhitespace(char[] ch, int start, int length) {
  }
  public void processingInstruction(String target, String data) {
  }
  public void setDocumentLocator(Locator locator) {
  }
  public void skippedEntity(String name) {
  }
  public void startDocument() {
  }
  public void startElement(String uri, String localName, String qName, Attributes atts) {
    if (localName == "object") {
      if (this.lastComment != null) {
        System.out.println("Element object with comment found: \"" + this.lastComment + "\"");
        this.lastComment = null;
      }
    } else {
      this.lastComment = null;
    }
  }
  public void startPrefixMapping(String prefix, String uri) {
  }

  public static void main(String[] args) {
    Test test = new Test();
    XMLReader parser;

    try {
      parser = XMLReaderFactory.createXMLReader();
    } catch (SAXException ex1) {
      try {
        parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
      } catch (SAXException ex2) {
        return;
      }
    }

    try {
      parser.setProperty("http://xml.org/sax/properties/lexical-handler", test);
    } catch (SAXNotRecognizedException e) {
      System.out.println(e.getMessage());
      return;
    } catch (SAXNotSupportedException e) {
      System.out.println(e.getMessage());
      return;
    }

    parser.setContentHandler(test);

    try {
      parser.parse("test.xml");
    } catch (SAXParseException e) {
      System.out.println(e.getMessage());
    } catch (SAXException e) { 
      System.out.println(e.getMessage());
    } catch (IOException e) {
      System.out.println(e.getMessage());
    }
  }
}

Save this code to "Test.java" and your XML content to "test.xml". Once compiled and executed, it should give you the following output:

$ javac Test.java 
$ java Test 
Element object with comment found: "Object Seed term: day, WikiTitle: day"