This project has retired. For details please refer to its Attic page.
MimeHelper xref

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   *
19   * Contributors:
20   *     Original contributors from geronimo-javamail_1.4_spec-1.7.1
21   *     Florent Guillaume
22   */
23  package org.apache.chemistry.opencmis.commons.impl;
24  
25  import java.io.ByteArrayOutputStream;
26  import java.io.UnsupportedEncodingException;
27  import java.util.HashMap;
28  import java.util.Map;
29  
30  /**
31   * MIME helper class.
32   */
33  public class MimeHelper {
34  
35      public static final String CONTENT_DISPOSITION = "Content-Disposition";
36  
37      public static final String DISPOSITION_ATTACHMENT = "attachment";
38      
39      public static final String DISPOSITION_INLINE = "inline";
40  
41      public static final String DISPOSITION_FORM_DATA_CONTENT = "form-data; name=\"content\"";
42      
43      public static final String DISPOSITION_FILENAME = "filename";
44  
45      // RFC 2045
46      private static final String MIME_SPECIALS = "()<>@,;:\\\"/[]?=" + "\t ";
47  
48      private static final String RFC2231_SPECIALS = "*'%" + MIME_SPECIALS;
49  
50      private static final String WHITE = " \t\n\r";
51  
52      private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
53  
54      private static final byte[] HEX_DECODE = new byte[0x80];
55      static {
56          for (int i = 0; i < HEX_DIGITS.length; i++) {
57              HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
58          }
59      }
60  
61      private MimeHelper() {
62      }
63  
64      /**
65       * Encodes a value per RFC 2231.
66       * <p>
67       * This is used to pass non-ASCII parameters to MIME parameter lists.
68       * <p>
69       * This implementation always uses UTF-8 and no language.
70       * <p>
71       * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
72       * details.
73       *
74       * @param value the value to encode
75       * @param buf the buffer to fill
76       * @return {@code true} if an encoding was needed, or {@code false} if no
77       *         encoding was actually needed
78       */
79      protected static boolean encodeRFC2231value(String value, StringBuilder buf) {
80          String charset = "UTF-8";
81          buf.append(charset);
82          buf.append("''"); // no language
83          byte[] bytes;
84          try {
85              bytes = value.getBytes(charset);
86          } catch (UnsupportedEncodingException e) {
87              return true;
88          }
89          boolean encoded = false;
90          for (int i = 0; i < bytes.length; i++) {
91              int ch = bytes[i] & 0xff;
92              if (ch <= 32 || ch >= 127 || RFC2231_SPECIALS.indexOf(ch) != -1) {
93                  buf.append('%');
94                  buf.append(HEX_DIGITS[ch >> 4]);
95                  buf.append(HEX_DIGITS[ch & 0xf]);
96                  encoded = true;
97              } else {
98                  buf.append((char) ch);
99              }
100         }
101         return encoded;
102     }
103 
104     /**
105      * Encodes a MIME parameter per RFC 2231.
106      * <p>
107      * This implementation always uses UTF-8 and no language.
108      * <p>
109      * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
110      * details.
111      *
112      * @param value the string to encode
113      * @return the encoded string
114      */
115      protected static String encodeRFC2231(String key, String value) {
116         StringBuilder buf = new StringBuilder();
117         boolean encoded = encodeRFC2231value(value, buf);
118         if (encoded) {
119             return "; " + key + "*=" + buf.toString();
120         } else {
121             return "; " + key + "=" + value;
122         }
123     }
124 
125     /**
126      * Encodes the Content-Disposition header value according to RFC 2183 and
127      * RFC 2231.
128      * <p>
129      * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
130      * details.
131      *
132      * @param disposition the disposition
133      * @param filename the file name
134      * @return the encoded header value
135      */
136     public static String encodeContentDisposition(String disposition,
137             String filename) {
138         if (disposition == null) {
139             disposition = DISPOSITION_ATTACHMENT;
140         }
141         return disposition + encodeRFC2231(DISPOSITION_FILENAME, filename);
142     }
143 
144     /**
145      * Decodes a filename from the Content-Disposition header value according to
146      * RFC 2183 and RFC 2231.
147      * <p>
148      * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
149      * details.
150      *
151      * @param value the header value to decode
152      * @return the filename
153      */
154     public static String decodeContentDispositionFilename(String value) {
155         Map<String, String> params = new HashMap<String, String>();
156         decodeContentDisposition(value, params);
157         return params.get(DISPOSITION_FILENAME);
158     }
159 
160     /**
161      * Decodes the Content-Disposition header value according to RFC 2183 and
162      * RFC 2231.
163      * <p>
164      * Does not deal with continuation lines.
165      * <p>
166      * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
167      * details.
168      *
169      * @param value the header value to decode
170      * @param params the map of parameters to fill
171      * @return the disposition
172      *
173      */
174     public static String decodeContentDisposition(String value,
175             Map<String, String> params) {
176         try {
177             HeaderTokenizer tokenizer = new HeaderTokenizer(value);
178             // get the first token, which must be an ATOM
179             Token token = tokenizer.next();
180             if (token.getType() != Token.ATOM) {
181                 return null;
182             }
183             String disposition = token.getValue();
184             // value ignored in this method
185 
186             // the remainder is the parameters
187             String remainder = tokenizer.getRemainder();
188             if (remainder != null) {
189                 getParameters(remainder, params);
190             }
191             return disposition;
192         } catch (ParseException e) {
193             return null;
194         }
195     }
196 
197     protected static class ParseException extends Exception {
198         private static final long serialVersionUID = 1L;
199 
200         public ParseException() {
201             super();
202         }
203 
204         public ParseException(String message) {
205             super(message);
206         }
207     }
208 
209     /*
210      * From geronimo-javamail_1.4_spec-1.7.1. Token
211      */
212     protected static class Token {
213         // Constant values from J2SE 1.4 API Docs (Constant values)
214         public static final int ATOM = -1;
215 
216         public static final int COMMENT = -3;
217 
218         public static final int EOF = -4;
219 
220         public static final int QUOTEDSTRING = -2;
221 
222         private final int _type;
223 
224         private final String _value;
225 
226         public Token(int type, String value) {
227             _type = type;
228             _value = value;
229         }
230 
231         public int getType() {
232             return _type;
233         }
234 
235         public String getValue() {
236             return _value;
237         }
238     }
239 
240     /*
241      * Tweaked from geronimo-javamail_1.4_spec-1.7.1. HeaderTokenizer
242      */
243     protected static class HeaderTokenizer {
244 
245         private static final Token EOF = new Token(Token.EOF, null);
246 
247         private final String header;
248 
249         private final String delimiters;
250 
251         private final boolean skipComments;
252 
253         private int pos;
254 
255         public HeaderTokenizer(String header) {
256             this(header, MIME_SPECIALS, true);
257         }
258 
259         protected HeaderTokenizer(String header, String delimiters,
260                 boolean skipComments) {
261             this.header = header;
262             this.delimiters = delimiters;
263             this.skipComments = skipComments;
264         }
265 
266         public String getRemainder() {
267             return header.substring(pos);
268         }
269 
270         public Token next() throws ParseException {
271             return readToken();
272         }
273 
274         /**
275          * Read an ATOM token from the parsed header.
276          *
277          * @return A token containing the value of the atom token.
278          */
279         private Token readAtomicToken() {
280             // skip to next delimiter
281             int start = pos;
282             while (++pos < header.length()) {
283                 // break on the first non-atom character.
284                 char ch = header.charAt(pos);
285                 if (delimiters.indexOf(header.charAt(pos)) != -1 || ch < 32
286                         || ch >= 127) {
287                     break;
288                 }
289             }
290             return new Token(Token.ATOM, header.substring(start, pos));
291         }
292 
293         /**
294          * Read the next token from the header.
295          *
296          * @return The next token from the header. White space is skipped, and
297          *         comment tokens are also skipped if indicated.
298          */
299         private Token readToken() throws ParseException {
300             if (pos >= header.length()) {
301                 return EOF;
302             } else {
303                 char c = header.charAt(pos);
304                 // comment token...read and skip over this
305                 if (c == '(') {
306                     Token comment = readComment();
307                     if (skipComments) {
308                         return readToken();
309                     } else {
310                         return comment;
311                     }
312                     // quoted literal
313                 } else if (c == '\"') {
314                     return readQuotedString();
315                     // white space, eat this and find a real token.
316                 } else if (WHITE.indexOf(c) != -1) {
317                     eatWhiteSpace();
318                     return readToken();
319                     // either a CTL or special. These characters have a
320                     // self-defining token type.
321                 } else if (c < 32 || c >= 127 || delimiters.indexOf(c) != -1) {
322                     pos++;
323                     return new Token((int) c, String.valueOf(c));
324                 } else {
325                     // start of an atom, parse it off.
326                     return readAtomicToken();
327                 }
328             }
329         }
330 
331         /**
332          * Extract a substring from the header string and apply any
333          * escaping/folding rules to the string.
334          *
335          * @param start The starting offset in the header.
336          * @param end The header end offset + 1.
337          * @return The processed string value.
338          */
339         private String getEscapedValue(int start, int end)
340                 throws ParseException {
341             StringBuffer value = new StringBuffer();
342             for (int i = start; i < end; i++) {
343                 char ch = header.charAt(i);
344                 // is this an escape character?
345                 if (ch == '\\') {
346                     i++;
347                     if (i == end) {
348                         throw new ParseException("Invalid escape character");
349                     }
350                     value.append(header.charAt(i));
351                 }
352                 // line breaks are ignored, except for naked '\n' characters,
353                 // which are consider
354                 // parts of linear whitespace.
355                 else if (ch == '\r') {
356                     // see if this is a CRLF sequence, and skip the second if it
357                     // is.
358                     if (i < end - 1 && header.charAt(i + 1) == '\n') {
359                         i++;
360                     }
361                 } else {
362                     // just append the ch value.
363                     value.append(ch);
364                 }
365             }
366             return value.toString();
367         }
368 
369         /**
370          * Read a comment from the header, applying nesting and escape rules to
371          * the content.
372          *
373          * @return A comment token with the token value.
374          */
375         private Token readComment() throws ParseException {
376             int start = pos + 1;
377             int nesting = 1;
378             boolean requiresEscaping = false;
379             // skip to end of comment/string
380             while (++pos < header.length()) {
381                 char ch = header.charAt(pos);
382                 if (ch == ')') {
383                     nesting--;
384                     if (nesting == 0) {
385                         break;
386                     }
387                 } else if (ch == '(') {
388                     nesting++;
389                 } else if (ch == '\\') {
390                     pos++;
391                     requiresEscaping = true;
392                 }
393                 // we need to process line breaks also
394                 else if (ch == '\r') {
395                     requiresEscaping = true;
396                 }
397             }
398             if (nesting != 0) {
399                 throw new ParseException("Unbalanced comments");
400             }
401             String value;
402             if (requiresEscaping) {
403                 value = getEscapedValue(start, pos);
404             } else {
405                 value = header.substring(start, pos++);
406             }
407             return new Token(Token.COMMENT, value);
408         }
409 
410         /**
411          * Parse out a quoted string from the header, applying escaping rules to
412          * the value.
413          *
414          * @return The QUOTEDSTRING token with the value.
415          * @exception ParseException
416          */
417         private Token readQuotedString() throws ParseException {
418             int start = pos + 1;
419             boolean requiresEscaping = false;
420             // skip to end of comment/string
421             while (++pos < header.length()) {
422                 char ch = header.charAt(pos);
423                 if (ch == '"') {
424                     String value;
425                     if (requiresEscaping) {
426                         value = getEscapedValue(start, pos++);
427                     } else {
428                         value = header.substring(start, pos++);
429                     }
430                     return new Token(Token.QUOTEDSTRING, value);
431                 } else if (ch == '\\') {
432                     pos++;
433                     requiresEscaping = true;
434                 }
435                 // we need to process line breaks also
436                 else if (ch == '\r') {
437                     requiresEscaping = true;
438                 }
439             }
440             throw new ParseException("Missing '\"'");
441         }
442 
443         /**
444          * Skip white space in the token string.
445          */
446         private void eatWhiteSpace() {
447             // skip to end of whitespace
448             while (++pos < header.length()
449                     && WHITE.indexOf(header.charAt(pos)) != -1)
450                 ;
451         }
452     }
453 
454     /*
455      * Tweaked from geronimo-javamail_1.4_spec-1.7.1. ParameterList
456      */
457     protected static Map<String, String> getParameters(String list,
458             Map<String, String> params) throws ParseException {
459         HeaderTokenizer tokenizer = new HeaderTokenizer(list);
460         while (true) {
461             Token token = tokenizer.next();
462             switch (token.getType()) {
463             case Token.EOF:
464                 // the EOF token terminates parsing.
465                 return params;
466 
467             case ';':
468                 // each new parameter is separated by a semicolon, including
469                 // the first, which separates
470                 // the parameters from the main part of the header.
471                 // the next token needs to be a parameter name
472                 token = tokenizer.next();
473                 // allow a trailing semicolon on the parameters.
474                 if (token.getType() == Token.EOF) {
475                     return params;
476                 }
477 
478                 if (token.getType() != Token.ATOM) {
479                     throw new ParseException("Invalid parameter name: "
480                             + token.getValue());
481                 }
482 
483                 // get the parameter name as a lower case version for better
484                 // mapping.
485                 String name = token.getValue().toLowerCase();
486 
487                 token = tokenizer.next();
488 
489                 // parameters are name=value, so we must have the "=" here.
490                 if (token.getType() != '=') {
491                     throw new ParseException("Missing '='");
492                 }
493 
494                 // now the value, which may be an atom or a literal
495                 token = tokenizer.next();
496 
497                 if (token.getType() != Token.ATOM
498                         && token.getType() != Token.QUOTEDSTRING) {
499                     throw new ParseException("Invalid parameter value: "
500                             + token.getValue());
501                 }
502 
503                 String value = token.getValue();
504 
505                 // we might have to do some additional decoding. A name that
506                 // ends with "*" is marked as being encoded, so if requested, we
507                 // decode the value.
508                 if (name.endsWith("*")) {
509                     name = name.substring(0, name.length() - 1);
510                     value = decodeRFC2231value(value);
511                 }
512                 params.put(name, value);
513                 break;
514             default:
515                 throw new ParseException("Missing ';'");
516             }
517         }
518     }
519 
520     protected static String decodeRFC2231value(String value) {
521         int q1 = value.indexOf('\'');
522         if (q1 == -1) {
523             // missing charset
524             return value;
525         }
526         String mimeCharset = value.substring(0, q1);
527         int q2 = value.indexOf('\'', q1 + 1);
528         if (q2 == -1) {
529             // missing language
530             return value;
531         }
532         byte[] bytes = fromHex(value.substring(q2 + 1));
533         try {
534             return new String(bytes, getJavaCharset(mimeCharset));
535         } catch (UnsupportedEncodingException e) {
536             // incorrect encoding
537             return value;
538         }
539     }
540 
541     protected static byte[] fromHex(String data) {
542         ByteArrayOutputStream out = new ByteArrayOutputStream();
543         for (int i = 0; i < data.length();) {
544             char c = data.charAt(i++);
545             if (c == '%') {
546                 if (i > data.length() - 2) {
547                     break; // unterminated sequence
548                 }
549                 byte b1 = HEX_DECODE[data.charAt(i++) & 0x7f];
550                 byte b2 = HEX_DECODE[data.charAt(i++) & 0x7f];
551                 out.write((b1 << 4) | b2);
552             } else {
553                 out.write((byte) c);
554             }
555         }
556         return out.toByteArray();
557     }
558 
559     protected static String getJavaCharset(String mimeCharset) {
560         // good enough for standard values
561         return mimeCharset;
562     }
563 
564 }