1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 *
19 * Contributors:
20 * Original contributors from geronimo-javamail_1.4_spec-1.7.1
21 * Florent Guillaume
22 */
23 package org.apache.chemistry.opencmis.commons.impl;
24
25 import java.io.ByteArrayOutputStream;
26 import java.io.UnsupportedEncodingException;
27 import java.util.HashMap;
28 import java.util.Map;
29
30 /**
31 * MIME helper class.
32 */
33 public class MimeHelper {
34
35 public static final String CONTENT_DISPOSITION = "Content-Disposition";
36
37 public static final String DISPOSITION_ATTACHMENT = "attachment";
38
39 public static final String DISPOSITION_INLINE = "inline";
40
41 public static final String DISPOSITION_FORM_DATA_CONTENT = "form-data; name=\"content\"";
42
43 public static final String DISPOSITION_FILENAME = "filename";
44
45 // RFC 2045
46 private static final String MIME_SPECIALS = "()<>@,;:\\\"/[]?=" + "\t ";
47
48 private static final String RFC2231_SPECIALS = "*'%" + MIME_SPECIALS;
49
50 private static final String WHITE = " \t\n\r";
51
52 private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
53
54 private static final byte[] HEX_DECODE = new byte[0x80];
55 static {
56 for (int i = 0; i < HEX_DIGITS.length; i++) {
57 HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
58 }
59 }
60
61 private MimeHelper() {
62 }
63
64 /**
65 * Encodes a value per RFC 2231.
66 * <p>
67 * This is used to pass non-ASCII parameters to MIME parameter lists.
68 * <p>
69 * This implementation always uses UTF-8 and no language.
70 * <p>
71 * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
72 * details.
73 *
74 * @param value the value to encode
75 * @param buf the buffer to fill
76 * @return {@code true} if an encoding was needed, or {@code false} if no
77 * encoding was actually needed
78 */
79 protected static boolean encodeRFC2231value(String value, StringBuilder buf) {
80 String charset = "UTF-8";
81 buf.append(charset);
82 buf.append("''"); // no language
83 byte[] bytes;
84 try {
85 bytes = value.getBytes(charset);
86 } catch (UnsupportedEncodingException e) {
87 return true;
88 }
89 boolean encoded = false;
90 for (int i = 0; i < bytes.length; i++) {
91 int ch = bytes[i] & 0xff;
92 if (ch <= 32 || ch >= 127 || RFC2231_SPECIALS.indexOf(ch) != -1) {
93 buf.append('%');
94 buf.append(HEX_DIGITS[ch >> 4]);
95 buf.append(HEX_DIGITS[ch & 0xf]);
96 encoded = true;
97 } else {
98 buf.append((char) ch);
99 }
100 }
101 return encoded;
102 }
103
104 /**
105 * Encodes a MIME parameter per RFC 2231.
106 * <p>
107 * This implementation always uses UTF-8 and no language.
108 * <p>
109 * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
110 * details.
111 *
112 * @param value the string to encode
113 * @return the encoded string
114 */
115 protected static String encodeRFC2231(String key, String value) {
116 StringBuilder buf = new StringBuilder();
117 boolean encoded = encodeRFC2231value(value, buf);
118 if (encoded) {
119 return "; " + key + "*=" + buf.toString();
120 } else {
121 return "; " + key + "=" + value;
122 }
123 }
124
125 /**
126 * Encodes the Content-Disposition header value according to RFC 2183 and
127 * RFC 2231.
128 * <p>
129 * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
130 * details.
131 *
132 * @param disposition the disposition
133 * @param filename the file name
134 * @return the encoded header value
135 */
136 public static String encodeContentDisposition(String disposition,
137 String filename) {
138 if (disposition == null) {
139 disposition = DISPOSITION_ATTACHMENT;
140 }
141 return disposition + encodeRFC2231(DISPOSITION_FILENAME, filename);
142 }
143
144 /**
145 * Decodes a filename from the Content-Disposition header value according to
146 * RFC 2183 and RFC 2231.
147 * <p>
148 * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
149 * details.
150 *
151 * @param value the header value to decode
152 * @return the filename
153 */
154 public static String decodeContentDispositionFilename(String value) {
155 Map<String, String> params = new HashMap<String, String>();
156 decodeContentDisposition(value, params);
157 return params.get(DISPOSITION_FILENAME);
158 }
159
160 /**
161 * Decodes the Content-Disposition header value according to RFC 2183 and
162 * RFC 2231.
163 * <p>
164 * Does not deal with continuation lines.
165 * <p>
166 * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for
167 * details.
168 *
169 * @param value the header value to decode
170 * @param params the map of parameters to fill
171 * @return the disposition
172 *
173 */
174 public static String decodeContentDisposition(String value,
175 Map<String, String> params) {
176 try {
177 HeaderTokenizer tokenizer = new HeaderTokenizer(value);
178 // get the first token, which must be an ATOM
179 Token token = tokenizer.next();
180 if (token.getType() != Token.ATOM) {
181 return null;
182 }
183 String disposition = token.getValue();
184 // value ignored in this method
185
186 // the remainder is the parameters
187 String remainder = tokenizer.getRemainder();
188 if (remainder != null) {
189 getParameters(remainder, params);
190 }
191 return disposition;
192 } catch (ParseException e) {
193 return null;
194 }
195 }
196
197 protected static class ParseException extends Exception {
198 private static final long serialVersionUID = 1L;
199
200 public ParseException() {
201 super();
202 }
203
204 public ParseException(String message) {
205 super(message);
206 }
207 }
208
209 /*
210 * From geronimo-javamail_1.4_spec-1.7.1. Token
211 */
212 protected static class Token {
213 // Constant values from J2SE 1.4 API Docs (Constant values)
214 public static final int ATOM = -1;
215
216 public static final int COMMENT = -3;
217
218 public static final int EOF = -4;
219
220 public static final int QUOTEDSTRING = -2;
221
222 private final int _type;
223
224 private final String _value;
225
226 public Token(int type, String value) {
227 _type = type;
228 _value = value;
229 }
230
231 public int getType() {
232 return _type;
233 }
234
235 public String getValue() {
236 return _value;
237 }
238 }
239
240 /*
241 * Tweaked from geronimo-javamail_1.4_spec-1.7.1. HeaderTokenizer
242 */
243 protected static class HeaderTokenizer {
244
245 private static final Token EOF = new Token(Token.EOF, null);
246
247 private final String header;
248
249 private final String delimiters;
250
251 private final boolean skipComments;
252
253 private int pos;
254
255 public HeaderTokenizer(String header) {
256 this(header, MIME_SPECIALS, true);
257 }
258
259 protected HeaderTokenizer(String header, String delimiters,
260 boolean skipComments) {
261 this.header = header;
262 this.delimiters = delimiters;
263 this.skipComments = skipComments;
264 }
265
266 public String getRemainder() {
267 return header.substring(pos);
268 }
269
270 public Token next() throws ParseException {
271 return readToken();
272 }
273
274 /**
275 * Read an ATOM token from the parsed header.
276 *
277 * @return A token containing the value of the atom token.
278 */
279 private Token readAtomicToken() {
280 // skip to next delimiter
281 int start = pos;
282 while (++pos < header.length()) {
283 // break on the first non-atom character.
284 char ch = header.charAt(pos);
285 if (delimiters.indexOf(header.charAt(pos)) != -1 || ch < 32
286 || ch >= 127) {
287 break;
288 }
289 }
290 return new Token(Token.ATOM, header.substring(start, pos));
291 }
292
293 /**
294 * Read the next token from the header.
295 *
296 * @return The next token from the header. White space is skipped, and
297 * comment tokens are also skipped if indicated.
298 */
299 private Token readToken() throws ParseException {
300 if (pos >= header.length()) {
301 return EOF;
302 } else {
303 char c = header.charAt(pos);
304 // comment token...read and skip over this
305 if (c == '(') {
306 Token comment = readComment();
307 if (skipComments) {
308 return readToken();
309 } else {
310 return comment;
311 }
312 // quoted literal
313 } else if (c == '\"') {
314 return readQuotedString();
315 // white space, eat this and find a real token.
316 } else if (WHITE.indexOf(c) != -1) {
317 eatWhiteSpace();
318 return readToken();
319 // either a CTL or special. These characters have a
320 // self-defining token type.
321 } else if (c < 32 || c >= 127 || delimiters.indexOf(c) != -1) {
322 pos++;
323 return new Token((int) c, String.valueOf(c));
324 } else {
325 // start of an atom, parse it off.
326 return readAtomicToken();
327 }
328 }
329 }
330
331 /**
332 * Extract a substring from the header string and apply any
333 * escaping/folding rules to the string.
334 *
335 * @param start The starting offset in the header.
336 * @param end The header end offset + 1.
337 * @return The processed string value.
338 */
339 private String getEscapedValue(int start, int end)
340 throws ParseException {
341 StringBuffer value = new StringBuffer();
342 for (int i = start; i < end; i++) {
343 char ch = header.charAt(i);
344 // is this an escape character?
345 if (ch == '\\') {
346 i++;
347 if (i == end) {
348 throw new ParseException("Invalid escape character");
349 }
350 value.append(header.charAt(i));
351 }
352 // line breaks are ignored, except for naked '\n' characters,
353 // which are consider
354 // parts of linear whitespace.
355 else if (ch == '\r') {
356 // see if this is a CRLF sequence, and skip the second if it
357 // is.
358 if (i < end - 1 && header.charAt(i + 1) == '\n') {
359 i++;
360 }
361 } else {
362 // just append the ch value.
363 value.append(ch);
364 }
365 }
366 return value.toString();
367 }
368
369 /**
370 * Read a comment from the header, applying nesting and escape rules to
371 * the content.
372 *
373 * @return A comment token with the token value.
374 */
375 private Token readComment() throws ParseException {
376 int start = pos + 1;
377 int nesting = 1;
378 boolean requiresEscaping = false;
379 // skip to end of comment/string
380 while (++pos < header.length()) {
381 char ch = header.charAt(pos);
382 if (ch == ')') {
383 nesting--;
384 if (nesting == 0) {
385 break;
386 }
387 } else if (ch == '(') {
388 nesting++;
389 } else if (ch == '\\') {
390 pos++;
391 requiresEscaping = true;
392 }
393 // we need to process line breaks also
394 else if (ch == '\r') {
395 requiresEscaping = true;
396 }
397 }
398 if (nesting != 0) {
399 throw new ParseException("Unbalanced comments");
400 }
401 String value;
402 if (requiresEscaping) {
403 value = getEscapedValue(start, pos);
404 } else {
405 value = header.substring(start, pos++);
406 }
407 return new Token(Token.COMMENT, value);
408 }
409
410 /**
411 * Parse out a quoted string from the header, applying escaping rules to
412 * the value.
413 *
414 * @return The QUOTEDSTRING token with the value.
415 * @exception ParseException
416 */
417 private Token readQuotedString() throws ParseException {
418 int start = pos + 1;
419 boolean requiresEscaping = false;
420 // skip to end of comment/string
421 while (++pos < header.length()) {
422 char ch = header.charAt(pos);
423 if (ch == '"') {
424 String value;
425 if (requiresEscaping) {
426 value = getEscapedValue(start, pos++);
427 } else {
428 value = header.substring(start, pos++);
429 }
430 return new Token(Token.QUOTEDSTRING, value);
431 } else if (ch == '\\') {
432 pos++;
433 requiresEscaping = true;
434 }
435 // we need to process line breaks also
436 else if (ch == '\r') {
437 requiresEscaping = true;
438 }
439 }
440 throw new ParseException("Missing '\"'");
441 }
442
443 /**
444 * Skip white space in the token string.
445 */
446 private void eatWhiteSpace() {
447 // skip to end of whitespace
448 while (++pos < header.length()
449 && WHITE.indexOf(header.charAt(pos)) != -1)
450 ;
451 }
452 }
453
454 /*
455 * Tweaked from geronimo-javamail_1.4_spec-1.7.1. ParameterList
456 */
457 protected static Map<String, String> getParameters(String list,
458 Map<String, String> params) throws ParseException {
459 HeaderTokenizer tokenizer = new HeaderTokenizer(list);
460 while (true) {
461 Token token = tokenizer.next();
462 switch (token.getType()) {
463 case Token.EOF:
464 // the EOF token terminates parsing.
465 return params;
466
467 case ';':
468 // each new parameter is separated by a semicolon, including
469 // the first, which separates
470 // the parameters from the main part of the header.
471 // the next token needs to be a parameter name
472 token = tokenizer.next();
473 // allow a trailing semicolon on the parameters.
474 if (token.getType() == Token.EOF) {
475 return params;
476 }
477
478 if (token.getType() != Token.ATOM) {
479 throw new ParseException("Invalid parameter name: "
480 + token.getValue());
481 }
482
483 // get the parameter name as a lower case version for better
484 // mapping.
485 String name = token.getValue().toLowerCase();
486
487 token = tokenizer.next();
488
489 // parameters are name=value, so we must have the "=" here.
490 if (token.getType() != '=') {
491 throw new ParseException("Missing '='");
492 }
493
494 // now the value, which may be an atom or a literal
495 token = tokenizer.next();
496
497 if (token.getType() != Token.ATOM
498 && token.getType() != Token.QUOTEDSTRING) {
499 throw new ParseException("Invalid parameter value: "
500 + token.getValue());
501 }
502
503 String value = token.getValue();
504
505 // we might have to do some additional decoding. A name that
506 // ends with "*" is marked as being encoded, so if requested, we
507 // decode the value.
508 if (name.endsWith("*")) {
509 name = name.substring(0, name.length() - 1);
510 value = decodeRFC2231value(value);
511 }
512 params.put(name, value);
513 break;
514 default:
515 throw new ParseException("Missing ';'");
516 }
517 }
518 }
519
520 protected static String decodeRFC2231value(String value) {
521 int q1 = value.indexOf('\'');
522 if (q1 == -1) {
523 // missing charset
524 return value;
525 }
526 String mimeCharset = value.substring(0, q1);
527 int q2 = value.indexOf('\'', q1 + 1);
528 if (q2 == -1) {
529 // missing language
530 return value;
531 }
532 byte[] bytes = fromHex(value.substring(q2 + 1));
533 try {
534 return new String(bytes, getJavaCharset(mimeCharset));
535 } catch (UnsupportedEncodingException e) {
536 // incorrect encoding
537 return value;
538 }
539 }
540
541 protected static byte[] fromHex(String data) {
542 ByteArrayOutputStream out = new ByteArrayOutputStream();
543 for (int i = 0; i < data.length();) {
544 char c = data.charAt(i++);
545 if (c == '%') {
546 if (i > data.length() - 2) {
547 break; // unterminated sequence
548 }
549 byte b1 = HEX_DECODE[data.charAt(i++) & 0x7f];
550 byte b2 = HEX_DECODE[data.charAt(i++) & 0x7f];
551 out.write((b1 << 4) | b2);
552 } else {
553 out.write((byte) c);
554 }
555 }
556 return out.toByteArray();
557 }
558
559 protected static String getJavaCharset(String mimeCharset) {
560 // good enough for standard values
561 return mimeCharset;
562 }
563
564 }