This project has retired. For details please refer to its Attic page.
AtomEntryParser xref

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.chemistry.opencmis.server.impl.atompub;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.File;
24  import java.io.InputStream;
25  import java.math.BigInteger;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Map;
29  
30  import javax.xml.bind.JAXBElement;
31  import javax.xml.bind.Unmarshaller;
32  import javax.xml.namespace.QName;
33  import javax.xml.stream.XMLInputFactory;
34  import javax.xml.stream.XMLOutputFactory;
35  import javax.xml.stream.XMLStreamException;
36  import javax.xml.stream.XMLStreamReader;
37  import javax.xml.stream.XMLStreamWriter;
38  
39  import org.apache.chemistry.opencmis.commons.PropertyIds;
40  import org.apache.chemistry.opencmis.commons.data.Acl;
41  import org.apache.chemistry.opencmis.commons.data.ContentStream;
42  import org.apache.chemistry.opencmis.commons.data.ObjectData;
43  import org.apache.chemistry.opencmis.commons.data.Properties;
44  import org.apache.chemistry.opencmis.commons.data.PropertyData;
45  import org.apache.chemistry.opencmis.commons.data.PropertyId;
46  import org.apache.chemistry.opencmis.commons.data.PropertyString;
47  import org.apache.chemistry.opencmis.commons.exceptions.CmisInvalidArgumentException;
48  import org.apache.chemistry.opencmis.commons.exceptions.CmisNotSupportedException;
49  import org.apache.chemistry.opencmis.commons.impl.Base64;
50  import org.apache.chemistry.opencmis.commons.impl.Constants;
51  import org.apache.chemistry.opencmis.commons.impl.Converter;
52  import org.apache.chemistry.opencmis.commons.impl.JaxBHelper;
53  import org.apache.chemistry.opencmis.commons.impl.dataobjects.ContentStreamImpl;
54  import org.apache.chemistry.opencmis.commons.impl.dataobjects.PropertiesImpl;
55  import org.apache.chemistry.opencmis.commons.impl.dataobjects.PropertyStringImpl;
56  import org.apache.chemistry.opencmis.commons.impl.jaxb.CmisObjectType;
57  import org.apache.chemistry.opencmis.server.shared.ThresholdOutputStream;
58  
59  /**
60   * Parser for Atom Entries.
61   */
62  public class AtomEntryParser {
63  
64      private static final String TAG_ENTRY = "entry";
65      private static final String TAG_TITLE = "title";
66      private static final String TAG_OBJECT = "object";
67      private static final String TAG_CONTENT = "content";
68      private static final String TAG_BASE64 = "base64";
69      private static final String TAG_MEDIATYPE = "mediatype";
70  
71      private static final String ATTR_SRC = "src";
72      private static final String ATTR_TYPE = "type";
73  
74      protected boolean ignoreAtomContentSrc;
75  
76      private File tempDir;
77      private int memoryThreshold;
78  
79      private ObjectData object;
80      private ContentStreamImpl atomContentStream;
81      private ContentStreamImpl cmisContentStream;
82  
83      /**
84       * Constructor.
85       */
86      public AtomEntryParser(File tempDir, int memoryThreshold) {
87          this.tempDir = tempDir;
88          this.memoryThreshold = memoryThreshold;
89      }
90  
91      /**
92       * Constructor that immediately parses the given stream.
93       */
94      public AtomEntryParser(InputStream stream, File tempDir, int memoryThreshold) throws Exception {
95          this(tempDir, memoryThreshold);
96          parse(stream);
97      }
98  
99      /**
100      * Sets the flag controlling whether atom content src (external content) is
101      * ignored. This flag is false by default (not ignored).
102      */
103     public void setIgnoreAtomContentSrc(boolean ignoreAtomContentSrc) {
104         this.ignoreAtomContentSrc = ignoreAtomContentSrc;
105     }
106 
107     /**
108      * Returns the object.
109      */
110     public ObjectData getObject() {
111         return object;
112     }
113 
114     /**
115      * Returns the properties of the object.
116      */
117     public Properties getProperties() {
118         return (object == null ? null : object.getProperties());
119     }
120 
121     /**
122      * Returns the Id of the object.
123      */
124     public String getId() {
125         Properties properties = getProperties();
126         if (properties == null) {
127             return null;
128         }
129 
130         Map<String, PropertyData<?>> propertiesMap = properties.getProperties();
131         if (propertiesMap == null) {
132             return null;
133         }
134 
135         PropertyData<?> property = propertiesMap.get(PropertyIds.OBJECT_ID);
136         if (property instanceof PropertyId) {
137             return ((PropertyId) property).getFirstValue();
138         }
139 
140         return null;
141     }
142 
143     /**
144      * Returns the ACL of the object.
145      */
146     public Acl getAcl() {
147         return (object == null ? null : object.getAcl());
148     }
149 
150     /**
151      * Returns the policy id list of the object.
152      */
153     public List<String> getPolicyIds() {
154         if ((object == null) || (object.getPolicyIds() == null)) {
155             return null;
156         }
157 
158         return object.getPolicyIds().getPolicyIds();
159     }
160 
161     /**
162      * Returns the content stream.
163      */
164     public ContentStream getContentStream() {
165         return (cmisContentStream == null ? atomContentStream : cmisContentStream);
166     }
167 
168     /**
169      * Parses the stream.
170      */
171     public void parse(InputStream stream) throws Exception {
172         object = null;
173         atomContentStream = null;
174         cmisContentStream = null;
175 
176         if (stream == null) {
177             return;
178         }
179 
180         XMLInputFactory factory = XMLInputFactory.newInstance();
181         factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
182         XMLStreamReader parser = factory.createXMLStreamReader(stream);
183 
184         while (true) {
185             int event = parser.getEventType();
186             if (event == XMLStreamReader.START_ELEMENT) {
187                 QName name = parser.getName();
188 
189                 if (Constants.NAMESPACE_ATOM.equals(name.getNamespaceURI()) && (TAG_ENTRY.equals(name.getLocalPart()))) {
190                     parseEntry(parser);
191                     break;
192                 } else {
193                     throw new CmisInvalidArgumentException("XML is not an Atom entry!");
194                 }
195             }
196 
197             if (!next(parser)) {
198                 break;
199             }
200         }
201 
202         parser.close();
203     }
204 
205     /**
206      * Parses an Atom entry.
207      */
208     private void parseEntry(XMLStreamReader parser) throws Exception {
209         String atomTitle = null;
210 
211         next(parser);
212 
213         // walk through all tags in entry
214         while (true) {
215             int event = parser.getEventType();
216             if (event == XMLStreamReader.START_ELEMENT) {
217                 QName name = parser.getName();
218 
219                 if (Constants.NAMESPACE_RESTATOM.equals(name.getNamespaceURI())) {
220                     if (TAG_OBJECT.equals(name.getLocalPart())) {
221                         parseObject(parser);
222                     } else if (TAG_CONTENT.equals(name.getLocalPart())) {
223                         parseCmisContent(parser);
224                     } else {
225                         skip(parser);
226                     }
227                 } else if (Constants.NAMESPACE_ATOM.equals(name.getNamespaceURI())) {
228                     if (TAG_CONTENT.equals(name.getLocalPart())) {
229                         parseAtomContent(parser);
230                     } else if (TAG_TITLE.equals(name.getLocalPart())) {
231                         atomTitle = readText(parser);
232                     } else {
233                         skip(parser);
234                     }
235                 } else {
236                     skip(parser);
237                 }
238             } else if (event == XMLStreamReader.END_ELEMENT) {
239                 break;
240             } else {
241                 if (!next(parser)) {
242                     break;
243                 }
244             }
245         }
246 
247         // overwrite cmis:name with Atom title
248         if ((object != null) && (object.getProperties() != null) && (atomTitle != null) && (atomTitle.length() > 0)) {
249             PropertyString nameProperty = new PropertyStringImpl(PropertyIds.NAME, atomTitle);
250             ((PropertiesImpl) object.getProperties()).replaceProperty(nameProperty);
251         }
252     }
253 
254     /**
255      * Parses a CMIS object.
256      */
257     private void parseObject(XMLStreamReader parser) throws Exception {
258         Unmarshaller u = JaxBHelper.createUnmarshaller();
259         JAXBElement<CmisObjectType> jaxbObject = u.unmarshal(parser, CmisObjectType.class);
260 
261         if (jaxbObject != null) {
262             object = Converter.convert(jaxbObject.getValue());
263         }
264     }
265 
266     /**
267      * Extract the content stream.
268      */
269     private void parseAtomContent(XMLStreamReader parser) throws Exception {
270         atomContentStream = new ContentStreamImpl();
271 
272         // read attributes
273         String type = "text";
274         for (int i = 0; i < parser.getAttributeCount(); i++) {
275             QName attrName = parser.getAttributeName(i);
276             if (ATTR_TYPE.equals(attrName.getLocalPart())) {
277                 atomContentStream.setMimeType(parser.getAttributeValue(i));
278                 if (parser.getAttributeValue(i) != null) {
279                     type = parser.getAttributeValue(i).trim().toLowerCase();
280                 }
281             } else if (ATTR_SRC.equals(attrName.getLocalPart())) {
282                 if (ignoreAtomContentSrc) {
283                     atomContentStream = null;
284                     skip(parser);
285                     return;
286                 }
287                 throw new CmisNotSupportedException("External content not supported!");
288             }
289         }
290 
291         byte[] bytes = null;
292         if (type.equals("text") || type.equals("html")) {
293             bytes = readText(parser).getBytes("UTF-8");
294         } else if (type.equals("xhtml")) {
295             bytes = copy(parser);
296         } else if (type.endsWith("/xml") || type.endsWith("+xml")) {
297             bytes = copy(parser);
298         } else if (type.startsWith("text/")) {
299             bytes = readText(parser).getBytes("UTF-8");
300         } else {
301             ThresholdOutputStream ths = readBase64(parser);
302             atomContentStream.setStream(ths.getInputStream());
303             atomContentStream.setLength(BigInteger.valueOf(ths.getSize()));
304         }
305 
306         if (bytes != null) {
307             atomContentStream.setStream(new ByteArrayInputStream(bytes));
308             atomContentStream.setLength(BigInteger.valueOf(bytes.length));
309         }
310     }
311 
312     /**
313      * Extract the content stream.
314      */
315     private void parseCmisContent(XMLStreamReader parser) throws Exception {
316         cmisContentStream = new ContentStreamImpl();
317 
318         next(parser);
319 
320         // walk through all tags in content
321         while (true) {
322             int event = parser.getEventType();
323             if (event == XMLStreamReader.START_ELEMENT) {
324                 QName name = parser.getName();
325 
326                 if (Constants.NAMESPACE_RESTATOM.equals(name.getNamespaceURI())) {
327                     if (TAG_MEDIATYPE.equals(name.getLocalPart())) {
328                         cmisContentStream.setMimeType(readText(parser));
329                     } else if (TAG_BASE64.equals(name.getLocalPart())) {
330                         ThresholdOutputStream ths = readBase64(parser);
331                         cmisContentStream.setStream(ths.getInputStream());
332                         cmisContentStream.setLength(BigInteger.valueOf(ths.getSize()));
333                     } else {
334                         skip(parser);
335                     }
336                 } else {
337                     skip(parser);
338                 }
339             } else if (event == XMLStreamReader.END_ELEMENT) {
340                 break;
341             } else {
342                 if (!next(parser)) {
343                     break;
344                 }
345             }
346         }
347 
348         next(parser);
349     }
350 
351     /**
352      * Parses a tag that contains text.
353      */
354     private static String readText(XMLStreamReader parser) throws Exception {
355         StringBuilder sb = new StringBuilder();
356 
357         next(parser);
358 
359         while (true) {
360             int event = parser.getEventType();
361             if (event == XMLStreamReader.END_ELEMENT) {
362                 break;
363             } else if (event == XMLStreamReader.CHARACTERS) {
364                 String s = parser.getText();
365                 if (s != null) {
366                     sb.append(s);
367                 }
368             } else if (event == XMLStreamReader.START_ELEMENT) {
369                 throw new RuntimeException("Unexpected tag: " + parser.getName());
370             }
371 
372             if (!next(parser)) {
373                 break;
374             }
375         }
376 
377         next(parser);
378 
379         return sb.toString();
380     }
381 
382     /**
383      * Parses a tag that contains base64 encoded content.
384      */
385     private ThresholdOutputStream readBase64(XMLStreamReader parser) throws Exception {
386         ThresholdOutputStream bufferStream = new ThresholdOutputStream(tempDir, memoryThreshold);
387         Base64.OutputStream b64stream = new Base64.OutputStream(bufferStream, Base64.DECODE);
388 
389         next(parser);
390 
391         try {
392             while (true) {
393                 int event = parser.getEventType();
394                 if (event == XMLStreamReader.END_ELEMENT) {
395                     break;
396                 } else if (event == XMLStreamReader.CHARACTERS) {
397                     String s = parser.getText();
398                     if (s != null) {
399                         b64stream.write(s.getBytes("US-ASCII"));
400                     }
401                 } else if (event == XMLStreamReader.START_ELEMENT) {
402                     throw new RuntimeException("Unexpected tag: " + parser.getName());
403                 }
404 
405                 if (!next(parser)) {
406                     break;
407                 }
408             }
409 
410             b64stream.close();
411         } catch (Exception e) {
412             bufferStream.destroy(); // remove temp file
413             throw e;
414         }
415 
416         next(parser);
417 
418         return bufferStream;
419     }
420 
421     /**
422      * Copies a subtree into a stream.
423      */
424     private static byte[] copy(XMLStreamReader parser) throws Exception {
425         // create a writer
426         ByteArrayOutputStream out = new ByteArrayOutputStream();
427         XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(out);
428 
429         writer.writeStartDocument();
430 
431         // copy subtree
432         int level = 1;
433         while (next(parser)) {
434             int event = parser.getEventType();
435             if (event == XMLStreamReader.START_ELEMENT) {
436                 copyStartElement(parser, writer);
437                 level++;
438             } else if (event == XMLStreamReader.CHARACTERS) {
439                 writer.writeCharacters(parser.getText());
440             } else if (event == XMLStreamReader.COMMENT) {
441                 writer.writeComment(parser.getText());
442             } else if (event == XMLStreamReader.CDATA) {
443                 writer.writeCData(parser.getText());
444             } else if (event == XMLStreamReader.END_ELEMENT) {
445                 level--;
446                 if (level == 0) {
447                     break;
448                 }
449                 writer.writeEndElement();
450             } else {
451                 break;
452             }
453         }
454 
455         writer.writeEndDocument();
456 
457         next(parser);
458 
459         return out.toByteArray();
460     }
461 
462     /**
463      * Copies a XML start element.
464      */
465     private static void copyStartElement(XMLStreamReader parser, XMLStreamWriter writer) throws Exception {
466         String namespaceUri = parser.getNamespaceURI();
467         String prefix = parser.getPrefix();
468         String localName = parser.getLocalName();
469 
470         // write start element
471         if (namespaceUri != null) {
472             if ((prefix == null) || (prefix.length() == 0)) {
473                 writer.writeStartElement(localName);
474             } else {
475                 writer.writeStartElement(prefix, localName, namespaceUri);
476             }
477         } else {
478             writer.writeStartElement(localName);
479         }
480 
481         // set namespaces
482         for (int i = 0; i < parser.getNamespaceCount(); i++) {
483             addNamespace(writer, parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
484         }
485         addNamespaceIfMissing(writer, prefix, namespaceUri);
486 
487         // write attributes
488         for (int i = 0; i < parser.getAttributeCount(); i++) {
489             String attrNamespaceUri = parser.getAttributeNamespace(i);
490             String attrPrefix = parser.getAttributePrefix(i);
491             String attrName = parser.getAttributeLocalName(i);
492             String attrValue = parser.getAttributeValue(i);
493 
494             if ((attrNamespaceUri == null) || (attrNamespaceUri.trim().length() == 0)) {
495                 writer.writeAttribute(attrName, attrValue);
496             } else if ((attrPrefix == null) || (attrPrefix.trim().length() == 0)) {
497                 writer.writeAttribute(attrNamespaceUri, attrName, attrValue);
498             } else {
499                 addNamespaceIfMissing(writer, attrPrefix, attrNamespaceUri);
500                 writer.writeAttribute(attrPrefix, attrNamespaceUri, attrName, attrValue);
501             }
502         }
503     }
504 
505     /**
506      * Checks if the given prefix is assigned to the given namespace.
507      */
508     @SuppressWarnings("unchecked")
509     private static void addNamespaceIfMissing(XMLStreamWriter writer, String prefix, String namespaceUri)
510             throws Exception {
511         if ((namespaceUri == null) || (namespaceUri.trim().length() == 0)) {
512             return;
513         }
514 
515         if (prefix == null) {
516             prefix = "";
517         }
518 
519         Iterator<String> iter = (Iterator<String>) writer.getNamespaceContext().getPrefixes(namespaceUri);
520         if (iter == null) {
521             return;
522         }
523 
524         while (iter.hasNext()) {
525             String p = iter.next();
526             if ((p != null) && (p.equals(prefix))) {
527                 return;
528             }
529         }
530 
531         addNamespace(writer, prefix, namespaceUri);
532     }
533 
534     /**
535      * Adds a namespace to a XML element.
536      */
537     private static void addNamespace(XMLStreamWriter writer, String prefix, String namespaceUri) throws Exception {
538         if ((prefix == null) || (prefix.trim().length() == 0)) {
539             writer.setDefaultNamespace(namespaceUri);
540             writer.writeDefaultNamespace(namespaceUri);
541         } else {
542             writer.setPrefix(prefix, namespaceUri);
543             writer.writeNamespace(prefix, namespaceUri);
544         }
545     }
546 
547     /**
548      * Skips a tag or subtree.
549      */
550     private static void skip(XMLStreamReader parser) throws Exception {
551         int level = 1;
552         while (next(parser)) {
553             int event = parser.getEventType();
554             if (event == XMLStreamReader.START_ELEMENT) {
555                 level++;
556             } else if (event == XMLStreamReader.END_ELEMENT) {
557                 level--;
558                 if (level == 0) {
559                     break;
560                 }
561             }
562         }
563 
564         next(parser);
565     }
566 
567     private static boolean next(XMLStreamReader parser) throws Exception {
568         if (parser.hasNext()) {
569             try {
570                 parser.next();
571             } catch (XMLStreamException e) {
572                 return false;
573             }
574             return true;
575         }
576 
577         return false;
578     }
579 }