1 package org.dom4j.io;
2
3 import org.dom4j.*;
4 import org.xmlpull.v1.XmlPullParser;
5 import org.xmlpull.v1.XmlPullParserException;
6 import org.xmlpull.v1.XmlPullParserFactory;
7
8 import java.io.*;
9 import java.net.URL;
10
11 /***
12 * <p><code>XPP3Reader</code> is a Reader of DOM4J documents that
13 * uses the fast
14 * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x</a>.
15 * It is very fast for use in SOAP style environments.</p>
16 *
17 * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard</a>
18 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
19 * @version $Revision: 1.3 $
20 */
21 public class XPP3Reader {
22
23 /***
24 * <code>DocumentFactory</code> used to create new document objects
25 */
26 private DocumentFactory factory;
27
28 /***
29 * <code>XmlPullParser</code> used to parse XML
30 */
31 private XmlPullParser xppParser;
32
33 /***
34 * <code>XmlPullParser</code> used to parse XML
35 */
36 private XmlPullParserFactory xppFactory;
37
38 /***
39 * DispatchHandler to call when each <code>Element</code> is encountered
40 */
41 private DispatchHandler dispatchHandler;
42
43
44 public XPP3Reader() {
45 }
46
47 public XPP3Reader(DocumentFactory factory) {
48 this.factory = factory;
49 }
50
51
52 /***
53 * <p>Reads a Document from the given <code>File</code></p>
54 *
55 * @param file is the <code>File</code> to read from.
56 * @return the newly created Document instance
57 * @throws DocumentException if an error occurs during parsing.
58 * @throws java.net.MalformedURLException if a URL could not be made for the given File
59 */
60 public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
61 String systemID = file.getAbsolutePath();
62 return read(new BufferedReader(new FileReader(file)), systemID);
63 }
64
65 /***
66 * <p>Reads a Document from the given <code>URL</code></p>
67 *
68 * @param url <code>URL</code> to read from.
69 * @return the newly created Document instance
70 * @throws DocumentException if an error occurs during parsing.
71 */
72 public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
73 String systemID = url.toExternalForm();
74 return read(createReader(url.openStream()), systemID);
75 }
76
77 /***
78 * <p>Reads a Document from the given URL or filename.</p>
79 * <p/>
80 * <p/>
81 * If the systemID contains a <code>':'</code> character then it is
82 * assumed to be a URL otherwise its assumed to be a file name.
83 * If you want finer grained control over this mechansim then please
84 * explicitly pass in either a {@link URL} or a {@link File} instance
85 * instead of a {@link String} to denote the source of the document.
86 * </p>
87 *
88 * @param systemID is a URL for a document or a file name.
89 * @return the newly created Document instance
90 * @throws DocumentException if an error occurs during parsing.
91 * @throws java.net.MalformedURLException if a URL could not be made for the given File
92 */
93 public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException {
94 if (systemID.indexOf(':') >= 0) {
95 // lets assume its a URL
96 return read(new URL(systemID));
97 } else {
98 // lets assume that we are given a file name
99 return read(new File(systemID));
100 }
101 }
102
103 /***
104 * <p>Reads a Document from the given stream</p>
105 *
106 * @param in <code>InputStream</code> to read from.
107 * @return the newly created Document instance
108 * @throws DocumentException if an error occurs during parsing.
109 */
110 public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
111 return read(createReader(in));
112 }
113
114 /***
115 * <p>Reads a Document from the given <code>Reader</code></p>
116 *
117 * @param reader is the reader for the input
118 * @return the newly created Document instance
119 * @throws DocumentException if an error occurs during parsing.
120 */
121 public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException {
122 getXPPParser().setInput(reader);
123 return parseDocument();
124 }
125
126 /***
127 * <p>Reads a Document from the given array of characters</p>
128 *
129 * @param text is the text to parse
130 * @return the newly created Document instance
131 * @throws DocumentException if an error occurs during parsing.
132 */
133 public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException {
134 getXPPParser().setInput(new CharArrayReader(text));
135 return parseDocument();
136 }
137
138 /***
139 * <p>Reads a Document from the given stream</p>
140 *
141 * @param in <code>InputStream</code> to read from.
142 * @param systemID is the URI for the input
143 * @return the newly created Document instance
144 * @throws DocumentException if an error occurs during parsing.
145 */
146 public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
147 return read(createReader(in), systemID);
148 }
149
150 /***
151 * <p>Reads a Document from the given <code>Reader</code></p>
152 *
153 * @param reader is the reader for the input
154 * @param systemID is the URI for the input
155 * @return the newly created Document instance
156 * @throws DocumentException if an error occurs during parsing.
157 */
158 public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException {
159 Document document = read(reader);
160 document.setName(systemID);
161 return document;
162 }
163
164
165 // Properties
166 //-------------------------------------------------------------------------
167
168 public XmlPullParser getXPPParser() throws XmlPullParserException {
169 if (xppParser == null) {
170 xppParser = getXPPFactory().newPullParser();
171 }
172 return xppParser;
173 }
174
175 public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
176 if (xppFactory == null) {
177 xppFactory = XmlPullParserFactory.newInstance();
178 }
179 xppFactory.setNamespaceAware(true);
180 return xppFactory;
181 }
182
183 public void setXPPFactory(XmlPullParserFactory xppFactory) {
184 this.xppFactory = xppFactory;
185 }
186
187 /***
188 * @return the <code>DocumentFactory</code> used to create document objects
189 */
190 public DocumentFactory getDocumentFactory() {
191 if (factory == null) {
192 factory = DocumentFactory.getInstance();
193 }
194 return factory;
195 }
196
197 /***
198 * <p>This sets the <code>DocumentFactory</code> used to create new documents.
199 * This method allows the building of custom DOM4J tree objects to be implemented
200 * easily using a custom derivation of {@link DocumentFactory}</p>
201 *
202 * @param factory <code>DocumentFactory</code> used to create DOM4J objects
203 */
204 public void setDocumentFactory(DocumentFactory factory) {
205 this.factory = factory;
206 }
207
208
209 /***
210 * Adds the <code>ElementHandler</code> to be called when the
211 * specified path is encounted.
212 *
213 * @param path is the path to be handled
214 * @param handler is the <code>ElementHandler</code> to be called
215 * by the event based processor.
216 */
217 public void addHandler(String path, ElementHandler handler) {
218 getDispatchHandler().addHandler(path, handler);
219 }
220
221 /***
222 * Removes the <code>ElementHandler</code> from the event based
223 * processor, for the specified path.
224 *
225 * @param path is the path to remove the <code>ElementHandler</code> for.
226 */
227 public void removeHandler(String path) {
228 getDispatchHandler().removeHandler(path);
229 }
230
231 /***
232 * When multiple <code>ElementHandler</code> instances have been
233 * registered, this will set a default <code>ElementHandler</code>
234 * to be called for any path which does <b>NOT</b> have a handler
235 * registered.
236 *
237 * @param handler is the <code>ElementHandler</code> to be called
238 * by the event based processor.
239 */
240 public void setDefaultHandler(ElementHandler handler) {
241 getDispatchHandler().setDefaultHandler(handler);
242 }
243
244 // Implementation methods
245 //-------------------------------------------------------------------------
246 protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
247 DocumentFactory df = getDocumentFactory();
248 Document document = df.createDocument();
249 Element parent = null;
250 XmlPullParser pp = getXPPParser();
251 pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
252 // pp.setFeature(XmlPullParser.FEATURE_PROCESS_DOCDECL, true);
253 // pp.setFeature(XmlPullParser.FEATURE_VALIDATION, true);
254 // pp.setFeature("http://xmlpull.org/v1/doc/features.html#xml-roundtrip", true);
255 while (true) {
256 int type = pp.next();
257 // int type = pp.nextToken();
258 switch (type) {
259 /*
260 case XmlPullParser.PROCESSING_INSTRUCTION:
261 {
262 String text = pp.getText();
263 int loc = text.indexOf(" ");
264 if (loc >= 0) {
265 document.addProcessingInstruction(text.substring(0, loc), text.substring(loc + 1));
266 } else
267 document.addProcessingInstruction(text, "");
268 break;
269 }
270 case XmlPullParser.COMMENT:
271 {
272 if (parent != null)
273 parent.addComment(pp.getText());
274 else
275 document.addComment(pp.getText());
276 break;
277 }
278 case XmlPullParser.CDSECT:
279 {
280 if (parent != null) {
281 parent.addCDATA(pp.getText());
282 } else {
283 throw new DocumentException("Cannot have text content outside of the root document");
284 }
285 break;
286
287 }
288 case XmlPullParser.ENTITY_REF:
289 {
290 break;
291
292 }
293
294 */
295 case XmlPullParser.END_DOCUMENT:
296 {
297 return document;
298 }
299 case XmlPullParser.START_TAG:
300 {
301 QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace());
302 Element newElement = df.createElement(qname);
303 int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
304 int nsEnd = pp.getNamespaceCount(pp.getDepth());
305 for (int i = nsStart; i < nsEnd; i++)
306 if (pp.getNamespacePrefix(i) != null)
307 newElement.addNamespace(pp.getNamespacePrefix(i), pp.getNamespaceUri(i));
308 for (int i = 0; i < pp.getAttributeCount(); i++) {
309 QName qa = (pp.getAttributePrefix(i) == null) ? df.createQName(pp.getAttributeName(i)) : df.createQName(pp.getAttributeName(i), pp.getAttributePrefix(i), pp.getAttributeNamespace(i));
310 newElement.addAttribute(qa, pp.getAttributeValue(i));
311 }
312 if (parent != null) {
313 parent.add(newElement);
314 } else {
315 document.add(newElement);
316 }
317 parent = newElement;
318 break;
319 }
320 case XmlPullParser.END_TAG:
321 {
322 if (parent != null) {
323 parent = parent.getParent();
324 }
325 break;
326 }
327 case XmlPullParser.TEXT:
328 {
329 String text = pp.getText();
330 if (parent != null) {
331 parent.addText(text);
332 } else {
333 throw new DocumentException("Cannot have text content outside of the root document");
334 }
335 break;
336 }
337 default:
338 {
339 ;
340 }
341 }
342 }
343 }
344
345 protected DispatchHandler getDispatchHandler() {
346 if (dispatchHandler == null) {
347 dispatchHandler = new DispatchHandler();
348 }
349 return dispatchHandler;
350 }
351
352 protected void setDispatchHandler(DispatchHandler dispatchHandler) {
353 this.dispatchHandler = dispatchHandler;
354 }
355
356 /***
357 * Factory method to create a Reader from the given InputStream.
358 */
359 protected Reader createReader(InputStream in) throws IOException {
360 return new BufferedReader(new InputStreamReader(in));
361 }
362 }
363
364
365 /*
366 * Redistribution and use of this software and associated documentation
367 * ("Software"), with or without modification, are permitted provided
368 * that the following conditions are met:
369 *
370 * 1. Redistributions of source code must retain copyright
371 * statements and notices. Redistributions must also contain a
372 * copy of this document.
373 *
374 * 2. Redistributions in binary form must reproduce the
375 * above copyright notice, this list of conditions and the
376 * following disclaimer in the documentation and/or other
377 * materials provided with the distribution.
378 *
379 * 3. The name "DOM4J" must not be used to endorse or promote
380 * products derived from this Software without prior written
381 * permission of MetaStuff, Ltd. For written permission,
382 * please contact dom4j-info@metastuff.com.
383 *
384 * 4. Products derived from this Software may not be called "DOM4J"
385 * nor may "DOM4J" appear in their names without prior written
386 * permission of MetaStuff, Ltd. DOM4J is a registered
387 * trademark of MetaStuff, Ltd.
388 *
389 * 5. Due credit should be given to the DOM4J Project -
390 * http://www.dom4j.org
391 *
392 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
393 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
394 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
395 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
396 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
397 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
398 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
399 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
400 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
401 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
402 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
403 * OF THE POSSIBILITY OF SUCH DAMAGE.
404 *
405 * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
406 *
407 * $Id: XPP3Reader.java,v 1.3 2004/09/07 19:34:03 pelle Exp $
408 */
This page was automatically generated by Maven