This project has retired. For details please refer to its Attic page.
MetadataParserTika xref

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.chemistry.opencmis.client.parser;
20  
21  import java.io.File;
22  import java.io.FileInputStream;
23  import java.io.InputStream;
24  
25  import org.apache.chemistry.opencmis.client.mapper.MapperException;
26  import org.apache.chemistry.opencmis.commons.definitions.PropertyDefinition;
27  import org.apache.chemistry.opencmis.commons.definitions.TypeDefinition;
28  import org.apache.chemistry.opencmis.commons.enums.PropertyType;
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.tika.metadata.Metadata;
32  import org.apache.tika.parser.AutoDetectParser;
33  import org.apache.tika.parser.ParseContext;
34  import org.apache.tika.parser.Parser;
35  import org.xml.sax.ContentHandler;
36  import org.xml.sax.helpers.DefaultHandler;
37  
38  /**
39   * A metadata parser using the Apache Tika library
40   * @author Jens
41   *
42   */
43  public class MetadataParserTika extends AbstractMetadataParser {
44  
45      private static final Log LOG = LogFactory.getLog(MetadataParserTika.class.getName());
46  
47      public MetadataParserTika() {        
48      }
49      
50      public void extractMetadata(File f, TypeDefinition td) throws MapperException {
51          try {
52              InputStream stream = new FileInputStream(f);
53              Metadata metadata = new Metadata();
54              ContentHandler handler = new DefaultHandler();
55              Parser parser = new AutoDetectParser(); 
56              ParseContext context = new ParseContext();
57              parser.parse(stream, handler, metadata, context);
58  
59              reset();
60              
61              for (String key : metadata.names()) {
62                  String val = metadata.get(key);
63                  LOG.debug("Found metadata \'" + key + "\': " + val);      
64                  if (null != cmisProperties) {
65                      String propertyId = mapper.getMappedPropertyId(key);
66                      if (null != propertyId && null != val) {
67                          if (td != null) {
68                              PropertyDefinition<?> propDef = td.getPropertyDefinitions().get(propertyId);
69                              if (null == propDef)
70                                  throw new MapperException("Mapping error: unknown property "+ propertyId + " in type definition " + td.getId());
71                              PropertyType propertyType = propDef.getPropertyType();
72                              Object convVal = mapper.convertValue(propertyId, propDef, val);
73                              if (null != convVal)
74                                  cmisProperties.put(propertyId, convVal);
75                          } else
76                              cmisProperties.put(propertyId, val); // omit conversion if no type definition is available
77                      }
78                  }
79              }
80  
81          } catch (Exception e) {
82              LOG.error(e);
83              throw new MapperException("Extracting metadata failed for file " + f.getAbsolutePath(), e);
84          }
85      }    
86      
87      public void listMetadata(File f) throws MapperException {
88          try {
89              InputStream stream = new FileInputStream(f);
90              Metadata metadata = new Metadata();
91              ContentHandler handler = new DefaultHandler();
92              Parser parser = new AutoDetectParser(); 
93              ParseContext context = new ParseContext();
94              parser.parse(stream, handler, metadata, context);
95  
96              for (String key : metadata.names()) {
97                  String val = metadata.get(key);
98                  LOG.info("Found metadata \'" + key + "\': " + val);      
99              }
100 
101         } catch (Exception e) {
102             LOG.error(e);
103             throw new MapperException("Extracting metadata failed, file not found: " + f.getAbsolutePath(), e);
104         }
105     } 
106 }