Home > coding, java > Simple XML processing in Java using XPath

Simple XML processing in Java using XPath

Often I just want to get or set a few values from/on a given XML document. XPath is the standard for specifying locations in an XML document, with a Java XPath API since Java 5.

But the API is a little clunky, resulting in code like this:

DocumentBuilder builder = 
    DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document document = builder.parse(new File("/widgets.xml"));

XPath xpath = XPathFactory.newInstance().newXPath();
String expression = "/widgets/widget";
Node widgetNode = (Node) xpath.evaluate(expression, 
    document, XPathConstants.NODE); 

Wouldn’t it be nice if it was as simple as this:

Xml xml = new Xml(Paths.get("/widgets.xml"));
Xml node = xml.node("/widgets/widget");

This Xml class and some supporting code is in my x-xml github repo. It implements a Tree interface like this:

Interface

package net.doepner.xml;

/**
 * Conveniently extract and change text values
 * and navigate nodes on an XML document
 */
public interface Tree {

    /**
     * @param xpath An XPath expression
     * @return The nodes matching the XPath location
     * @throws XmlException Wrapping any underlying XML API exception
     */
    Iterable<? extends Tree> nodes(String xpath);

    /**
     * @param xpath An XPath expression
     * @return The single node matching the XPath location
     * @throws XmlException Wrapping any underlying XML API exception
     */
    Tree node(String xpath);

    /**
     * @param xpath An XPath expression
     * @return The text value from the unique XPath location
     * @throws XmlException Wrapping any underlying XML API exception
     */
    String get(String xpath);

    /**
     * @param xpath An XPath expression
     * @param value The text value to be set on the unique XPath location
     * @throws XmlException Wrapping any underlying XML API exception
     */
    void set(String xpath, String value);
}

Implementation

Here is the Xml class that implements the Tree interface:

package net.doepner.xml;

import com.doepner.libs.messaging.XmlException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.file.Path;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;

/**
 * Allows node navigation and getting and setting of xpath values
 */
public class Xml implements Tree {

    private final TransformerFactory tf = TransformerFactory.newInstance();
    private final XPath xp = XPathFactory.newInstance().newXPath();
    private final QNames qNames = new QNames();

    private final Node root;

    public Xml(Path path) {
        this(new InputSource(path.toFile().toURI().toASCIIString()));
    }

    public Xml(String payload) {
        this(new InputSource(new StringReader(payload)));
    }

    public Xml(InputSource inputSource) {
        this(parse(inputSource));
    }

    private Xml(Node root) {
        this.root = root;
    }

    private static Document parse(InputSource inputSource) {
        try {
            return DocumentBuilderFactory.newInstance()
                    .newDocumentBuilder().parse(inputSource);
        } catch (ParserConfigurationException 
                | SAXException | IOException e) {
            throw new XmlException(e);
        }
    }

    @Override
    public Iterable<Xml> nodes(String xpath) {
        final Collection<Xml> result = new LinkedList<>();
        for (Node node : new IterableNodes(eval(xpath, NodeList.class))) {
            result.add(new Xml(node));
        }
        return result;
    }

    @Override
    public Xml node(String xpath) {
        return new Xml(eval(xpath, Node.class));
    }

    @Override
    public String get(String xpath) {
        return eval(xpath, String.class);
    }

    @Override
    public void set(String xpath, String value) {
        eval(xpath, Node.class).setTextContent(value);
    }

    private final Map<String, XPathExpression> cache = new HashMap<>();

    private <T> T eval(String xpath, Class<T> resultType) {
        return resultType.cast(evaluate(resultType, getCompiled(xpath)));
    }

    private XPathExpression getCompiled(String xpath) {
        final XPathExpression cachedExpression = cache.get(xpath);
        if (cachedExpression != null) {
            return cachedExpression;
        } else {
            final XPathExpression expr = compile(xpath);
            cache.put(xpath, expr);
            return expr;
        }
    }

    private Object evaluate(Class<?> resultType, XPathExpression expr) {
        try {
            return expr.evaluate(root, qNames.get(resultType));
        } catch (XPathExpressionException e) {
            throw new XmlException(e);
        }
    }

    private XPathExpression compile(String xpath) {
        try {
            return xp.compile(xpath);
        } catch (XPathExpressionException e) {
            throw new XmlException(e);
        }
    }

    @Override
    public String toString() {
        try (final StringWriter writer = new StringWriter()) {
            tf.newTransformer().transform(new DOMSource(root),
                    new StreamResult(writer));
            return writer.getBuffer().toString();
        } catch (TransformerException | IOException e) {
            throw new XmlException(e);
        }
    }
}

Supporting classes

Custom exception wrapper class
package net.doepner.xml;
 
/**
 * An XML exception
 */
public class XmlException extends RuntimeException {
 
    public XmlException(Throwable cause) {
        super(cause);
    }
}
The mapping from Java types to QNames
package net.doepner.xml;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.namespace.QName;
import java.util.HashMap;
import java.util.Map;

import static javax.xml.xpath.XPathConstants.NODE;
import static javax.xml.xpath.XPathConstants.NODESET;
import static javax.xml.xpath.XPathConstants.STRING;

/**
 * Mapping from Java type to QName
 */
public class QNames {

    private final Map<Class<?>, QName> qNames = new HashMap<>();

    {
        qNames.put(String.class, STRING);
        qNames.put(Node.class, NODE);
        qNames.put(NodeList.class, NODESET);
    }

    public QName get(Class<?> resultType) {
        return qNames.get(resultType);
    }
}
Convenient iteration of XML node lists
package net.doepner.xml;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * Convenient iteration of XML node lists
 */
public class IterableNodes implements Iterable<Node> {

    private final NodeList nodeList;

    public IterableNodes(NodeList nodeList) {
        this.nodeList = nodeList;
    }

    @Override
    public Iterator<Node> iterator() {

        return new Iterator<Node>() {

            int index = 0;

            @Override
            public boolean hasNext() {
                return index < nodeList.getLength();
            }

            @Override
            public Node next() {
                if (hasNext()) {
                    return nodeList.item(index++);
                } else {
                    throw new NoSuchElementException();
                }
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }
        };
    }
}
Advertisements
Categories: coding, java Tags: , ,
  1. Marco Schramp
    September 3, 2014 at 12:22

    Now I feel so spoiled in the .Net framework. There these two lines of code actually exist.

    • September 4, 2014 at 20:39

      Hi Marco,

      Thanks for the feedback. It’s great that the DotNet framework has convenient XML APIs.

      Personally, I use Java because it is cross-platform and based on Open Source projects like OpenJDK. I know about Mono, the DotNet implementation for Linux and BSD, but it does not seem compelling to me. Also, I really like IntelliJ Community Edition, probably the best free (Java) IDE available.

      I am curious: How did you come across my blog?

      Cheers
      Oliver

    • September 8, 2014 at 09:27

      Here some documentation on that DotNet API, I guess:

  1. No trackbacks yet.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: