This project has retired. For details please refer to its Attic page.
LoremIpsum xref

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.chemistry.opencmis.util.content.loremipsum;
20  
21  import java.io.IOException;
22  import java.io.StringWriter;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Random;
30  import java.util.Set;
31  
32  /**
33   * A generator of lorem ipsum text ported from the Python implementation at
34   * http://code.google.com/p/lorem-ipsum-generator/. 
35   * Note: original code licensed under the BSD license
36   * 
37   */
38  public class LoremIpsum {
39  
40      private static class WordLengthPair {
41          public int len1;
42          public int len2;
43  
44          public WordLengthPair(int len1, int len2) {
45              this.len1 = len1;
46              this.len2 = len2;
47          }
48  
49          public String toString() {
50              return "WordLengthPair: len1: " + len1 + ", len2: " + len2;
51          }
52  
53          @Override
54          public boolean equals(Object other) {
55              if (this == null || other == null)
56                  return false;
57              if (other.getClass() != WordLengthPair.class)
58                  return false;
59              if (len1 == ((WordLengthPair) other).len1 && len2 == ((WordLengthPair) other).len2)
60                  return true;
61              else
62                  return false;
63          }
64  
65          @Override
66          public int hashCode() {
67              return len1 ^ len2;
68          }
69      }
70  
71      /**
72       * Delimiters that end sentences.
73       * 
74       * @type {Array.<string>}
75       * @private
76       */
77      private static final String DELIMITERS_SENTENCES[] = { ".", "?", "!" };
78  
79      /**
80       * Regular expression for splitting a text into sentences.
81       * 
82       * @type {RegExp}
83       * @private
84       */
85      private static final String SENTENCE_SPLIT_REGEX = "[\\.\\?\\!]";
86  
87      /**
88       * Delimiters that end words.
89       * 
90       * @type {Array.<string>}
91       * @private
92       */
93      private static final String DELIMITERS_WORDS[] = { ".", ",", "?", "!" };
94  
95      /**
96       * Regular expression for splitting text into words.
97       * 
98       * @type {RegExp}
99       * @private
100      */
101     private static final String WORD_SPLIT_REGEX = "\\s";
102 
103     private static final String LINE_SEPARATOR = System.getProperty("line.separator");
104 
105     /**
106      * Words that can be used in the generated output. Maps a word-length to a
107      * list of words of that length.
108      * 
109      * @type {goog.structs.Map}
110      * @private
111      */
112     private Map<Integer, List<String>> words;
113 
114     /**
115      * Chains of three words that appear in the sample text Maps a pair of
116      * word-lengths to a third word-length and an optional piece of trailing
117      * punctuation (for example, a period, comma, etc.).
118      * 
119      * @type {goog.structs.Map}
120      * @private
121      */
122     private Map<WordLengthPair, List<WordInfo>> chains;
123 
124     /**
125      * Pairs of word-lengths that can appear at the beginning of sentences.
126      * 
127      * @type {Array}
128      */
129     private List<WordLengthPair> starts;
130 
131     /**
132      * Average sentence length in words.
133      * 
134      * @type {number}
135      * @private
136      */
137     private double sentenceMean;
138 
139     /**
140      * Sigma (sqrt of Objectiance) for the sentence length in words.
141      * 
142      * @type {number}
143      * @private
144      */
145     private double sentenceSigma;
146 
147     /**
148      * Average paragraph length in sentences.
149      * 
150      * @type {number}
151      * @private
152      */
153     private double paragraphMean;
154 
155     /**
156      * Sigma (sqrt of variance) for the paragraph length in sentences.
157      * 
158      * @type {number}
159      * @private
160      */
161     private double paragraphSigma;
162 
163     /**
164      * Sample that the generated text is based on .
165      * 
166      * @type {string}
167      */
168     private String sample = SAMPLE;
169 
170     /**
171      * Dictionary of words.
172      * 
173      * @type {string}
174      */
175     private String dictionary = DICT;
176 
177     /**
178      * Picks a random element of the array.
179      * 
180      * @param {Array} array The array to pick from.
181      * @return {*} An element from the array.
182      */
183     private WordInfo randomChoice(WordInfo[] array) {
184         return array[randomInt(array.length)];
185     };
186 
187     private String randomChoice(String[] array) {
188         return array[randomInt(array.length)];
189     };
190 
191     private int randomInt(int length) {
192         return randomGenerator.nextInt(length);
193     }
194 
195     private static class WordInfo {
196         int len;
197         String delim;
198     }
199 
200     private Random randomGenerator = new Random();
201 
202     /**
203      * Generates random strings of "lorem ipsum" text, based on the word
204      * distribution of a sample text, using the words in a dictionary.
205      * 
206      * @constructor
207      */
208     public LoremIpsum() {
209         generateChains(this.sample);
210         generateStatistics(this.sample);
211         initializeDictionary(this.dictionary);
212     };
213 
214     public LoremIpsum(String sample, String dictionary) {
215         this.sample = sample;
216         this.dictionary = dictionary;
217         generateChains(this.sample);
218         generateStatistics(this.sample);
219         initializeDictionary(this.dictionary);
220     };
221 
222     public LoremIpsum(String sample, String[] dictionary) {
223         this.sample = sample;
224         this.dictionary = null;
225         generateChains(this.sample);
226         generateStatistics(this.sample);
227         initializeDictionary(dictionary);
228     };
229 
230     public LoremIpsum(String sample) {
231         this.sample = sample;
232         String[] dictWords = filterNotEmptyOrWhiteSpace(sample.split("[^\\p{L}]"/* "\\W" */)).toArray(new String[0]);
233         Set<String> dict = new HashSet<String>(Arrays.asList(dictWords));
234         dictWords = dict.toArray(new String[0]);
235         Arrays.sort(dictWords);
236 
237         generateChains(this.sample);
238         generateStatistics(this.sample);
239         initializeDictionary(dictWords);
240     };
241 
242     /**
243      * Generates a single lorem ipsum paragraph, of random length.
244      * 
245      * @param {boolean} opt_startWithLorem Whether to start the sentence with
246      *        the standard "Lorem ipsum..." first sentence.
247      * @return {string} The generated sentence.
248      */
249     public String generateParagraph(boolean opt_startWithLorem) {
250         // The length of the paragraph is a normally distributed random
251         // Objectiable.
252         Double paragraphLengthDbl = randomNormal(this.paragraphMean, this.paragraphSigma);
253         int paragraphLength = Math.max((int) Math.floor(paragraphLengthDbl), 1);
254 
255         // Construct a paragraph from a number of sentences.
256         List<String> paragraph = new ArrayList<String>();
257         boolean startWithLorem = opt_startWithLorem;
258         while (paragraph.size() < paragraphLength) {
259             String sentence = this.generateSentence(startWithLorem);
260             paragraph.add(sentence);
261             startWithLorem = false;
262         }
263 
264         StringBuffer result = new StringBuffer();
265         // Form the paragraph into a string.
266         for (String sentence : paragraph) {
267             result.append(sentence);
268             result.append(" ");
269         }
270         return result.toString();
271     }
272 
273     /**
274      * Generates a single sentence, of random length.
275      * 
276      * @param {boolean} opt_startWithLorem Whether to start the setnence with
277      *        the standard "Lorem ipsum..." first sentence.
278      * @return {string} The generated sentence.
279      */
280     public String generateSentence(boolean opt_startWithLorem) {
281         if (this.chains.size() == 0 || this.starts.size() == 0) {
282             throw new RuntimeException("No chains created (Invalid sample text?)");
283         }
284 
285         if (this.words.size() == 0) {
286             throw new RuntimeException("No dictionary");
287         }
288 
289         // The length of the sentence is a normally distributed random
290         // Objectiable.
291         double sentenceLengthDbl = randomNormal(this.sentenceMean, this.sentenceSigma);
292         int sentenceLength = Math.max((int) Math.floor(sentenceLengthDbl), 1);
293 
294         String wordDelimiter = ""; // Defined here in case while loop doesn't
295                                    // run
296 
297         // Start the sentence with "Lorem ipsum...", if desired
298         List<String> sentence;
299         ;
300         if (opt_startWithLorem) {
301             String lorem = "lorem ipsum dolor sit amet, consecteteur adipiscing elit";
302             sentence = new ArrayList<String>(Arrays.asList(splitWords(lorem)));
303             if (sentence.size() > sentenceLength) {
304                 sentence.subList(0, sentenceLength);
305             }
306             String lastWord = sentence.get(sentence.size() - 1);
307             String lastChar = lastWord.substring(lastWord.length() - 1);
308             if (contains(DELIMITERS_WORDS, lastChar)) {
309                 wordDelimiter = lastChar;
310             }
311         } else {
312             sentence = new ArrayList<String>();
313         }
314 
315         WordLengthPair previous = new WordLengthPair(0, 0);
316 
317         // Generate a sentence from the "chains"
318         while (sentence.size() < sentenceLength) {
319             // If the current starting point is invalid, choose another randomly
320             if (!this.chains.containsKey(previous)) {
321                 previous = this.chooseRandomStart_();
322             }
323 
324             // Choose the next "chain" to go to. This determines the next word
325             // length we use, and whether there is e.g. a comma at the end of
326             // the word.
327             WordInfo chain = randomChoice(this.chains.get(previous).toArray(new WordInfo[0]));
328             int wordLength = chain.len;
329 
330             // If the word delimiter contained in the chain is also a sentence
331             // delimiter, then we don"t include it because we don"t want the
332             // sentence to end prematurely (we want the length to match the
333             // sentence_length value).
334             if (contains(DELIMITERS_SENTENCES, chain.delim)) {
335                 wordDelimiter = "";
336             } else {
337                 wordDelimiter = chain.delim;
338             }
339 
340             // Choose a word randomly that matches (or closely matches) the
341             // length we're after.
342             int closestLength = chooseClosest(this.words.keySet().toArray(new Integer[0]), wordLength);
343             String word = randomChoice(this.words.get(closestLength).toArray(new String[0]));
344 
345             sentence.add(word + wordDelimiter);
346             previous = new WordLengthPair(previous.len2, wordLength);
347 
348         }
349 
350         // Finish the sentence off with capitalisation, a period and
351         // form it into a string
352         StringBuffer result = new StringBuffer();
353         for (String s : sentence) {
354             result.append(s);
355             result.append(" ");
356         }
357         result.deleteCharAt(result.length() - 1);
358 
359         result.replace(0, 1, result.substring(0, 1).toUpperCase());
360         int strLen = result.length() - 1;
361         if (wordDelimiter.length() > 0 && wordDelimiter.charAt(0) == result.charAt(strLen))
362             result.deleteCharAt(strLen);
363         result.append(".");
364         return result.toString();
365     }
366 
367     public String getSample() {
368         return sample;
369     }
370 
371     public void setSample(String sample) {
372         this.sample = sample;
373         generateChains(this.sample);
374         generateStatistics(this.sample);
375     }
376 
377     public String getDictionary() {
378         return dictionary;
379     }
380 
381     public void setDictionary(String dictionary) {
382         this.dictionary = dictionary;
383         initializeDictionary(this.dictionary);
384     }
385 
386     /**
387      * Generates multiple paragraphs of text, with begin before the paragraphs,
388      * end after the paragraphs, and between between each two paragraphs.
389      */
390     private String generateMarkupParagraphs(String begin, String end, String between, int quantity,
391             boolean startWithLorem) {
392 
393         StringBuffer text = new StringBuffer();
394 
395         text.append(begin);
396         String para = generateParagraph(startWithLorem);
397         text.append(para);
398         while (text.length() < quantity) {
399             para = generateParagraph(false);
400             text.append(para);
401             if (text.length() < quantity)
402                 text.append(between);
403         }
404 
405         text.append(end);
406         return text.toString();
407     }
408     
409     /**
410      * Generates multiple paragraphs of text, with begin before the paragraphs,
411      * end after the paragraphs, and between between each two paragraphs.
412      * @throws IOException 
413      */
414     private void generateMarkupParagraphs(Appendable writer, String begin, String end, String between, int quantity,
415             boolean startWithLorem) throws IOException {
416 
417         int len = begin.length();
418         writer.append(begin);
419         String para = generateParagraph(startWithLorem);
420         len += para.length();
421         writer.append(para);
422         while (len < quantity) {
423             para = generateParagraph(false);
424             len += para.length();
425             writer.append(para);
426             if (len < quantity) {
427                 writer.append(between);
428                 len += para.length();
429             }
430         }
431 
432         writer.append(end);
433     }
434     /**
435      * Generates multiple sentences of text, with begin before the sentences,
436      * end after the sentences, and between between each two sentences.
437      */
438     private String generateMarkupSentences(String begin, String end, String between, int quantity,
439             boolean startWithLorem) {
440 
441         StringBuffer text = new StringBuffer();
442         text.append(begin);
443         String sentence = generateSentence(startWithLorem);
444         text.append(sentence);
445 
446         while (text.length() < quantity) {
447             sentence = generateSentence(false);
448             text.append(sentence);
449             if (text.length() < quantity)
450                 text.append(between);
451         }
452 
453         text.append(end);
454         return text.toString();
455     }
456 
457     /**
458      * Generates the chains and starts values required for sentence generation.
459      * 
460      * @param {string} sample The same text.
461      * @private
462      */
463     private void generateChains(String sample) {
464 
465         String[] words = splitWords(sample);
466         WordInfo[] wordInfos = generateWordInfo(words);
467         WordLengthPair previous = new WordLengthPair(0, 0);
468         List<WordLengthPair> starts = new ArrayList<WordLengthPair>();
469         List<String> delimList = Arrays.asList(DELIMITERS_SENTENCES);
470         Map<WordLengthPair, List<WordInfo>> chains = new HashMap<WordLengthPair, List<WordInfo>>();
471 
472         for (WordInfo wi : wordInfos) {
473             if (wi.len == 0)
474                 continue;
475 
476             List<WordInfo> value = chains.get(previous);
477             if (null == value)
478                 chains.put(previous, new ArrayList<WordInfo>());
479             else
480                 chains.get(previous).add(wi);
481 
482             if (delimList.contains(wi.delim))
483                 starts.add(previous);
484 
485             previous.len1 = previous.len2;
486             previous.len2 = wi.len;
487         }
488 
489         if (chains.size() > 0) {
490             this.chains = chains;
491             this.starts = starts;
492         } else {
493             throw new RuntimeException("Invalid sample text.");
494         }
495 
496     }
497 
498     /**
499      * Calculates the mean and standard deviation of sentence and paragraph
500      * lengths.
501      * 
502      * @param {string} sample The same text.
503      * @private
504      */
505     private void generateStatistics(String sample) {
506         this.generateSentenceStatistics(sample);
507         this.generateParagraphStatistics(sample);
508     }
509 
510     /**
511      * Calculates the mean and standard deviation of the lengths of sentences
512      * (in words) in a sample text.
513      * 
514      * @param {string} sample The same text.
515      * @private
516      */
517     private void generateSentenceStatistics(String sample) {
518         List<String> sentences = filterNotEmptyOrWhiteSpace(splitSentences(sample));
519         int sentenceLengths[] = new int[sentences.size()];
520         for (int i = 0; i < sentences.size(); i++) {
521             String[] words = splitWords(sentences.get(i));
522             sentenceLengths[i] = words.length;
523         }
524         this.sentenceMean = mean(sentenceLengths);
525         this.sentenceSigma = sigma(sentenceLengths);
526     }
527 
528     /**
529      * Calculates the mean and standard deviation of the lengths of paragraphs
530      * (in sentences) in a sample text.
531      * 
532      * @param {string} sample The same text.
533      * @private
534      */
535     private void generateParagraphStatistics(String sample) {
536         List<String> paragraphs = filterNotEmptyOrWhiteSpace(splitParagraphs(sample));
537 
538         int paragraphLengths[] = new int[paragraphs.size()];
539         for (int i = 0; i < paragraphs.size(); i++) {
540             String[] sentences = splitSentences(paragraphs.get(i));
541             paragraphLengths[i] = sentences.length;
542         }
543 
544         this.paragraphMean = mean(paragraphLengths);
545         this.paragraphSigma = sigma(paragraphLengths);
546     }
547 
548     /**
549      * Sets the generator to use a given selection of words for generating
550      * sentences with.
551      * 
552      * @param {string} dictionary The dictionary to use.
553      */
554     private void initializeDictionary(String dictionary) {
555         String[] dictionaryWords = splitWords(dictionary);
556         initializeDictionary(dictionaryWords);
557     }
558 
559     private void initializeDictionary(String[] dictionaryWords) {
560         words = new HashMap<Integer, List<String>>();
561         for (String word : dictionaryWords) {
562             List<String> wordWithLen = words.get(word.length());
563             if (null == wordWithLen) {
564                 List<String> list = new ArrayList<String>();
565                 list.add(word);
566                 words.put(word.length(), list);
567             } else {
568                 wordWithLen.add(word);
569             }
570         }
571 
572         if (words.size() == 0)
573             throw new RuntimeException("Invalid dictionary.");
574     }
575 
576     /**
577      * Picks a random starting chain.
578      * 
579      * @return {string} The starting key.
580      * @private
581      */
582     private WordLengthPair chooseRandomStart_() {
583         Set<WordLengthPair> keys = chains.keySet();
584         Set<WordLengthPair> validStarts = new HashSet<WordLengthPair>(starts);
585         validStarts.retainAll(keys);
586         int index = randomInt(validStarts.size());
587         WordLengthPair wlPair = validStarts.toArray(new WordLengthPair[0])[index];
588         return wlPair;
589     }
590 
591     /**
592      * Splits a piece of text into paragraphs.
593      * 
594      * @param {string} text The text to split.
595      * @return {Array.<string>} An array of paragraphs.
596      * @private
597      */
598 
599     static String[] splitParagraphs(String text) {
600         return filterNotEmptyOrWhiteSpace(text.split("\n")).toArray(new String[0]);
601     }
602 
603     /**
604      * Splits a piece of text into sentences.
605      * 
606      * @param {string} text The text to split.
607      * @return {Array.<string>} An array of sentences.
608      * @private
609      */
610     static String[] splitSentences(String text) {
611         return filterNotEmptyOrWhiteSpace(text.split(SENTENCE_SPLIT_REGEX)).toArray(new String[0]);
612     }
613 
614     /**
615      * Splits a piece of text into words..
616      * 
617      * @param {string} text The text to split.
618      * @return {Array.<string>} An array of words.
619      * @private
620      */
621     static String[] splitWords(String text) {
622         return filterNotEmptyOrWhiteSpace(text.split(WORD_SPLIT_REGEX)).toArray(new String[0]);
623     }
624 
625     /**
626      * Find the number in the list of values that is closest to the target.
627      * 
628      * @param {Array.<number>} values The values.
629      * @param {number} target The target value.
630      * @return {number} The closest value.
631      */
632     static int chooseClosest(Integer[] values, int target) {
633         int closest = values[0];
634         for (int value : values) {
635             if (Math.abs(target - value) < Math.abs(target - closest))
636                 closest = value;
637         }
638 
639         return closest;
640     }
641 
642     /**
643      * Gets info about a word used as part of the lorem ipsum algorithm.
644      * 
645      * @param {string} word The word to check.
646      * @return {Array} A two element array. The first element is the size of the
647      *         word. The second element is the delimiter used in the word.
648      * @private
649      */
650     static private WordInfo getWordInfo(String word) {
651         WordInfo ret = new WordInfo();
652         for (String delim : DELIMITERS_WORDS) {
653             if (word.endsWith(delim)) {
654                 ret.len = word.length() - delim.length();
655                 ret.delim = delim;
656                 return ret;
657             }
658         }
659         ret.len = word.length();
660         ret.delim = "";
661         return ret;
662     }
663 
664     static private WordInfo[] generateWordInfo(String[] words) {
665         WordInfo[] result = new WordInfo[words.length];
666         int i = 0;
667         for (String word : words)
668             result[i++] = getWordInfo(word);
669         return result;
670     }
671 
672     /**
673      * Constant used for {@link #randomNormal_}.
674      * 
675      * @type {number}
676      * @private
677      */
678     private static final double NV_MAGICCONST_ = 4 * Math.exp(-0.5) / Math.sqrt(2.0);
679 
680     /**
681      * Generates a random number for a normal distribution with the specified
682      * mean and sigma.
683      * 
684      * @param {number} mu The mean of the distribution.
685      * @param {number} sigma The sigma of the distribution.
686      * @private
687      */
688     private static double randomNormal(double mu, double sigma) {
689         double z = 0.0d;
690         while (true) {
691             double u1 = Math.random();
692             double u2 = 1.0d - Math.random();
693             z = NV_MAGICCONST_ * (u1 - 0.5d) / u2;
694             double zz = z * z / 4.0d;
695             if (zz <= -Math.log(u2)) {
696                 break;
697             }
698         }
699         return mu + z * sigma;
700     }
701 
702     /**
703      * Returns the text if it is not empty or just whitespace.
704      * 
705      * @param {string} text the text to check.
706      * @return {boolean} Whether the text is neither empty nor whitespace.
707      * @private
708      */
709     private static List<String> filterNotEmptyOrWhiteSpace(String[] arr) {
710         List<String> result = new ArrayList<String>();
711         for (String s : arr) {
712             String trims = s.trim();
713             if (trims.length() > 0)
714                 result.add(trims);
715         }
716         return result;
717     }
718 
719     public static double mean(int[] values) {
720         return ((double) sum(values)) / ((double) (Math.max(values.length, 1)));
721     }
722 
723     public static double mean(double[] values) {
724         return sum(values) / ((double) (Math.max(values.length, 1)));
725     }
726 
727     public static double variance(double[] values) {
728         double[] squared = new double[values.length];
729         for (int i = 0; i < values.length; i++)
730             squared[i] = values[i] * values[i];
731 
732         double meanVal = mean(values);
733         return mean(squared) - (meanVal * meanVal);
734     }
735 
736     public static double sigma(int[] values) {
737         double[] d = new double[values.length];
738         for (int i = 0; i < values.length; i++)
739             d[i] = (double) values[i];
740 
741         return sigma(d);
742     }
743 
744     public static double sigma(double[] values) {
745         return Math.sqrt(variance(values));
746     }
747 
748     public static int sum(int[] values) {
749         int sum = 0;
750         for (int val : values)
751             sum += val;
752         return sum;
753     }
754 
755     public static double sum(double[] values) {
756         double sum = 0.0d;
757         for (double val : values)
758             sum += val;
759         return sum;
760     }
761 
762     public static boolean contains(String[] array, String val) {
763         for (String s : array)
764             if (s.equals(val))
765                 return true;
766         return false;
767     }
768 
769     /* for unit testing */
770     double getSentenceMean() {
771         return sentenceMean;
772     }
773 
774     double getSentenceSigma() {
775         return sentenceSigma;
776     }
777 
778     double getParagraphMean() {
779         return paragraphMean;
780     }
781 
782     double getParagraphSigma() {
783         return paragraphSigma;
784     }
785 
786     /**
787      * Dictionary of words for lorem ipsum.
788      * 
789      * @type {string}
790      * @private
791      */
792     private static final String DICT = "a ac accumsan ad adipiscing aenean aliquam aliquet amet ante "
793             + "aptent arcu at auctor augue bibendum blandit class commodo "
794             + "condimentum congue consectetuer consequat conubia convallis cras "
795             + "cubilia cum curabitur curae cursus dapibus diam dictum dictumst "
796             + "dignissim dis dolor donec dui duis egestas eget eleifend elementum "
797             + "elit eni enim erat eros est et etiam eu euismod facilisi facilisis "
798             + "fames faucibus felis fermentum feugiat fringilla fusce gravida "
799             + "habitant habitasse hac hendrerit hymenaeos iaculis id imperdiet "
800             + "in inceptos integer interdum ipsum justo lacinia lacus laoreet "
801             + "lectus leo libero ligula litora lobortis lorem luctus maecenas "
802             + "magna magnis malesuada massa mattis mauris metus mi molestie "
803             + "mollis montes morbi mus nam nascetur natoque nec neque netus "
804             + "nibh nisi nisl non nonummy nostra nulla nullam nunc odio orci "
805             + "ornare parturient pede pellentesque penatibus per pharetra "
806             + "phasellus placerat platea porta porttitor posuere potenti praesent "
807             + "pretium primis proin pulvinar purus quam quis quisque rhoncus "
808             + "ridiculus risus rutrum sagittis sapien scelerisque sed sem semper "
809             + "senectus sit sociis sociosqu sodales sollicitudin suscipit "
810             + "suspendisse taciti tellus tempor tempus tincidunt torquent tortor "
811             + "tristique turpis ullamcorper ultrices ultricies urna ut Objectius ve "
812             + "vehicula vel velit venenatis vestibulum vitae vivamus viverra " + "volutpat vulputate";
813 
814     /**
815      * A sample to use for generating the distribution of word and sentence
816      * lengths in lorem ipsum.
817      * 
818      * @type {string}
819      * @private
820      */
821     private static final String SAMPLE = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean "
822             + "commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus "
823             + "et magnis dis parturient montes, nascetur ridiculus mus. Donec quam "
824             + "felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla "
825             + "consequat massa quis enim. Donec pede justo, fringilla vel, aliquet "
826             + "nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, "
827             + "venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. "
828             + "Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean "
829             + "vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat "
830             + "vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra "
831             + "quis, feugiat a, tellus. Phasellus viverra nulla ut metus Objectius "
832             + "laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel "
833             + "augue. Curabitur ullamcorper ultricies nisi. Nam eget dui.\n\n" +
834 
835             "Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem "
836             + "quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam "
837             + "nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec "
838             + "odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis "
839             + "faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus "
840             + "tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales "
841             + "sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit "
842             + "cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend "
843             + "sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, "
844             + "metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis "
845             + "hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci "
846             + "luctus et ultrices posuere cubilia Curae; In ac dui quis mi " + "consectetuer lacinia.\n\n" +
847 
848             "Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet "
849             + "nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer "
850             + "ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent "
851             + "adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy "
852             + "metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros "
853             + "et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, "
854             + "nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit "
855             + "risus. Phasellus nec sem in justo pellentesque facilisis. Etiam "
856             + "imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus "
857             + "non, auctor et, hendrerit quis, nisi.\n\n" +
858 
859             "Curabitur ligula sapien, tincidunt non, euismod vitae, posuere "
860             + "imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed "
861             + "cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus "
862             + "accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci "
863             + "luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis "
864             + "porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis "
865             + "orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, "
866             + "bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede "
867             + "sit amet augue. In turpis. Pellentesque posuere. Praesent turpis.\n\n" +
868 
869             "Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu "
870             + "sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales "
871             + "nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse "
872             + "pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, "
873             + "nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in "
874             + "faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id "
875             + "purus. Ut Objectius tincidunt libero. Phasellus dolor. Maecenas vestibulum "
876             + "mollis diam. Pellentesque ut neque. Pellentesque habitant morbi "
877             + "tristique senectus et netus et malesuada fames ac turpis egestas.\n\n" +
878 
879             "In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac "
880             + "felis quis tortor malesuada pretium. Pellentesque auctor neque nec "
881             + "urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean "
882             + "viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et "
883             + "netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis "
884             + "pretium feugiat. Vivamus quis mi. Phasellus a est. Phasellus magna.\n\n" +
885 
886             "In hac habitasse platea dictumst. Curabitur at lacus ac velit ornare "
887             + "lobortis. Curabitur a felis in nunc fringilla tristique. Morbi mattis "
888             + "ullamcorper velit. Phasellus gravida semper nisi. Nullam vel sem. "
889             + "Pellentesque libero tortor, tincidunt et, tincidunt eget, semper nec, "
890             + "quam. Sed hendrerit. Morbi ac felis. Nunc egestas, augue at "
891             + "pellentesque laoreet, felis eros vehicula leo, at malesuada velit leo "
892             + "quis pede. Donec interdum, metus et hendrerit aliquet, dolor diam "
893             + "sagittis ligula, eget egestas libero turpis vel mi. Nunc nulla. Fusce "
894             + "risus nisl, viverra et, tempor et, pretium in, sapien. Donec venenatis " + "vulputate lorem.\n\n" +
895 
896             "Morbi nec metus. Phasellus blandit leo ut odio. Maecenas ullamcorper, "
897             + "dui et placerat feugiat, eros pede Objectius nisi, condimentum viverra "
898             + "felis nunc et lorem. Sed magna purus, fermentum eu, tincidunt eu, "
899             + "Objectius ut, felis. In auctor lobortis lacus. Quisque libero metus, "
900             + "condimentum nec, tempor a, commodo mollis, magna. Vestibulum "
901             + "ullamcorper mauris at ligula. Fusce fermentum. Nullam cursus lacinia "
902             + "erat. Praesent blandit laoreet nibh.\n\n" +
903 
904             "Fusce convallis metus id felis luctus adipiscing. Pellentesque egestas, "
905             + "neque sit amet convallis pulvinar, justo nulla eleifend augue, ac "
906             + "auctor orci leo non est. Quisque id mi. Ut tincidunt tincidunt erat. "
907             + "Etiam feugiat lorem non metus. Vestibulum dapibus nunc ac augue. "
908             + "Curabitur vestibulum aliquam leo. Praesent egestas neque eu enim. In "
909             + "hac habitasse platea dictumst. Fusce a quam. Etiam ut purus mattis "
910             + "mauris sodales aliquam. Curabitur nisi. Quisque malesuada placerat "
911             + "nisl. Nam ipsum risus, rutrum vitae, vestibulum eu, molestie vel, " + "lacus.\n\n" +
912 
913             "Sed augue ipsum, egestas nec, vestibulum et, malesuada adipiscing, "
914             + "dui. Vestibulum facilisis, purus nec pulvinar iaculis, ligula mi "
915             + "congue nunc, vitae euismod ligula urna in dolor. Mauris sollicitudin "
916             + "fermentum libero. Praesent nonummy mi in odio. Nunc interdum lacus sit "
917             + "amet orci. Vestibulum rutrum, mi nec elementum vehicula, eros quam "
918             + "gravida nisl, id fringilla neque ante vel mi. Morbi mollis tellus ac "
919             + "sapien. Phasellus volutpat, metus eget egestas mollis, lacus lacus "
920             + "blandit dui, id egestas quam mauris ut lacus. Fusce vel dui. Sed in "
921             + "libero ut nibh placerat accumsan. Proin faucibus arcu quis ante. In "
922             + "consectetuer turpis ut velit. Nulla sit amet est. Praesent metus "
923             + "tellus, elementum eu, semper a, adipiscing nec, purus. Cras risus "
924             + "ipsum, faucibus ut, ullamcorper id, Objectius ac, leo. Suspendisse "
925             + "feugiat. Suspendisse enim turpis, dictum sed, iaculis a, condimentum "
926             + "nec, nisi. Praesent nec nisl a purus blandit viverra. Praesent ac "
927             + "massa at ligula laoreet iaculis. Nulla neque dolor, sagittis eget, "
928             + "iaculis quis, molestie non, velit.\n\n" +
929 
930             "Mauris turpis nunc, blandit et, volutpat molestie, porta ut, ligula. "
931             + "Fusce pharetra convallis urna. Quisque ut nisi. Donec mi odio, faucibus "
932             + "at, scelerisque quis, convallis in, nisi. Suspendisse non nisl sit amet "
933             + "velit hendrerit rutrum. Ut leo. Ut a nisl id ante tempus hendrerit. "
934             + "Proin pretium, leo ac pellentesque mollis, felis nunc ultrices eros, "
935             + "sed gravida augue augue mollis justo. Suspendisse eu ligula. Nulla "
936             + "facilisi. Donec id justo. Praesent porttitor, nulla vitae posuere "
937             + "iaculis, arcu nisl dignissim dolor, a pretium mi sem ut ipsum. "
938             + "Curabitur suscipit suscipit tellus.\n\n" +
939 
940             "Praesent vestibulum dapibus nibh. Etiam iaculis nunc ac metus. Ut id "
941             + "nisl quis enim dignissim sagittis. Etiam sollicitudin, ipsum eu "
942             + "pulvinar rutrum, tellus ipsum laoreet sapien, quis venenatis ante "
943             + "odio sit amet eros. Proin magna. Duis vel nibh at velit scelerisque "
944             + "suscipit. Curabitur turpis. Vestibulum suscipit nulla quis orci. Fusce "
945             + "ac felis sit amet ligula pharetra condimentum. Maecenas egestas arcu "
946             + "quis ligula mattis placerat. Duis lobortis massa imperdiet quam. " + "Suspendisse potenti.\n\n" +
947 
948             "Pellentesque commodo eros a enim. Vestibulum turpis sem, aliquet eget, "
949             + "lobortis pellentesque, rutrum eu, nisl. Sed libero. Aliquam erat "
950             + "volutpat. Etiam vitae tortor. Morbi vestibulum volutpat enim. Aliquam "
951             + "eu nunc. Nunc sed turpis. Sed mollis, eros et ultrices tempus, mauris "
952             + "ipsum aliquam libero, non adipiscing dolor urna a orci. Nulla porta "
953             + "dolor. Class aptent taciti sociosqu ad litora torquent per conubia "
954             + "nostra, per inceptos hymenaeos.\n\n" +
955 
956             "Pellentesque dapibus hendrerit tortor. Praesent egestas tristique nibh. "
957             + "Sed a libero. Cras Objectius. Donec vitae orci sed dolor rutrum auctor. "
958             + "Fusce egestas elit eget lorem. Suspendisse nisl elit, rhoncus eget, "
959             + "elementum ac, condimentum eget, diam. Nam at tortor in tellus interdum "
960             + "sagittis. Aliquam lobortis. Donec orci lectus, aliquam ut, faucibus "
961             + "non, euismod id, nulla. Curabitur blandit mollis lacus. Nam adipiscing. " + "Vestibulum eu odio.\n\n" +
962 
963             "Vivamus laoreet. Nullam tincidunt adipiscing enim. Phasellus tempus. "
964             + "Proin viverra, ligula sit amet ultrices semper, ligula arcu tristique "
965             + "sapien, a accumsan nisi mauris ac eros. Fusce neque. Suspendisse "
966             + "faucibus, nunc et pellentesque egestas, lacus ante convallis tellus, "
967             + "vitae iaculis lacus elit id tortor. Vivamus aliquet elit ac nisl. Fusce "
968             + "fermentum odio nec arcu. Vivamus euismod mauris. In ut quam vitae "
969             + "odio lacinia tincidunt. Praesent ut ligula non mi Objectius sagittis. "
970             + "Cras sagittis. Praesent ac sem eget est egestas volutpat. Vivamus "
971             + "consectetuer hendrerit lacus. Cras non dolor. Vivamus in erat ut urna "
972             + "cursus vestibulum. Fusce commodo aliquam arcu. Nam commodo suscipit "
973             + "quam. Quisque id odio. Praesent venenatis metus at tortor pulvinar " + "varius.\n\n";
974 
975     /**
976      * Generates a number of paragraphs, with each paragraph surrounded by HTML
977      * pararaph tags.
978      * 
979      * @param quantity
980      * @param startWithLorem
981      * @return
982      */
983     public String generateParagraphsHtml(int quantity, boolean startWithLorem) {
984 
985         return generateMarkupParagraphs("<p>" + LINE_SEPARATOR + "\t", LINE_SEPARATOR + "</p>", LINE_SEPARATOR + "</p>"
986                 + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR + "\t", quantity, startWithLorem);
987 
988     }
989 
990     /**
991      * Generates a number of paragraphs, with each paragraph surrounded by HTML
992      * pararaph tags.
993      * 
994      * @param writer
995      * @param quantity
996      * @param startWithLorem
997      * @throws IOException 
998      */
999     public void generateParagraphsHtml(Appendable writer, int quantity, boolean startWithLorem) throws IOException {
1000 
1001         generateMarkupParagraphs(writer, "<p>" + LINE_SEPARATOR + "\t", LINE_SEPARATOR + "</p>", LINE_SEPARATOR + "</p>"
1002                 + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR + "\t", quantity, startWithLorem);
1003 
1004     }
1005     
1006     
1007     /**
1008      * Generates one paragraph of HTML, surrounded by HTML pararaph tags.
1009      * 
1010      * @param quantity
1011      * @param startWithLorem
1012      * @return
1013      */
1014     public String generateOneParagraphHtml(int quantity, boolean startWithLorem) {
1015 
1016         return generateMarkupSentences("<p>" + LINE_SEPARATOR + "\t", LINE_SEPARATOR + "</p>", LINE_SEPARATOR,
1017                 quantity, startWithLorem);
1018 
1019     }
1020 
1021     /**
1022      * Generates a number of paragraphs, with each paragraph surrounded by HTML
1023      * paragraph tags as a full HTML page.
1024      * 
1025      * @param quantity
1026      * @param startWithLorem
1027      * @return
1028      */
1029     public String generateParagraphsFullHtml(int quantity, boolean startWithLorem) {
1030 
1031         String prefix = "<html>" + LINE_SEPARATOR + "<header>" + LINE_SEPARATOR + "<title>Lorem Ipsum</title>"
1032                 + LINE_SEPARATOR + "</header>" + LINE_SEPARATOR + LINE_SEPARATOR + "<body>";
1033         String postfix = "</body>" + LINE_SEPARATOR + "</html>" + LINE_SEPARATOR;
1034 
1035         return generateMarkupParagraphs(prefix + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR + "\t", LINE_SEPARATOR
1036                 + "</p>" + LINE_SEPARATOR + postfix, LINE_SEPARATOR + "</p>" + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR
1037                 + "\t", quantity, startWithLorem);
1038     }
1039     
1040     
1041     /**
1042      * Generates a number of paragraphs, with each paragraph surrounded by HTML
1043      * paragraph tags as a full HTML page.
1044      * 
1045      * @param writer
1046      * @param quantity
1047      * @param startWithLorem
1048      * @throws IOException 
1049      */
1050     public void generateParagraphsFullHtml(Appendable writer, int quantity, boolean startWithLorem) throws IOException {
1051 
1052         String prefix = "<html>" + LINE_SEPARATOR + "<header>" + LINE_SEPARATOR + "<title>Lorem Ipsum</title>"
1053                 + LINE_SEPARATOR + "</header>" + LINE_SEPARATOR + LINE_SEPARATOR + "<body>";
1054         String postfix = "</body>" + LINE_SEPARATOR + "</html>" + LINE_SEPARATOR;
1055 
1056         generateMarkupParagraphs(writer, prefix + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR + "\t", LINE_SEPARATOR
1057                 + "</p>" + LINE_SEPARATOR + postfix, LINE_SEPARATOR + "</p>" + LINE_SEPARATOR + "<p>" + LINE_SEPARATOR
1058                 + "\t", quantity, startWithLorem);
1059     }
1060     
1061     
1062     /**
1063      * Generates a number of paragraphs, with each paragraph separated by two
1064      * newlines.
1065      * 
1066      * @param quantity
1067      * @param startWithLorem
1068      * @return
1069      */
1070     public String generateParagraphsPlainText(int quantity, boolean startWithLorem) {
1071 
1072         return generateMarkupParagraphs("", "", LINE_SEPARATOR + LINE_SEPARATOR, quantity, startWithLorem);
1073     }
1074 
1075     /**
1076      * Generates a number of paragraphs, with each paragraph separated by two
1077      * newlines.
1078      * 
1079      * @param writer
1080      * @param quantity
1081      * @param startWithLorem
1082      * @throws IOException 
1083      */
1084     public void generateParagraphsPlainText(Appendable writer, int quantity, boolean startWithLorem) throws IOException {
1085 
1086         generateMarkupParagraphs(writer, "", "", LINE_SEPARATOR + LINE_SEPARATOR, quantity, startWithLorem);
1087     }
1088 
1089     /**
1090      * Generates a number of paragraphs, with each paragraph separated by two
1091      * newlines and no line exceeding maxCols columns
1092      * 
1093      * @param quantity
1094      * @param startWithLorem
1095      * @return
1096      */
1097     public String generateParagraphsPlainText(int quantity, int maxCols, boolean startWithLorem) {
1098 
1099         StringWriter writer = new StringWriter(quantity + 512);
1100         try {
1101             generateParagraphsPlainText(writer, quantity, maxCols, startWithLorem);
1102         } catch (IOException e) {
1103             throw new RuntimeException(e);
1104         }
1105         return writer.toString();
1106     }
1107 
1108     /**
1109      * Generates a number of paragraphs, with each paragraph separated by two
1110      * newlines and no line exceeding maxCols columns
1111      * 
1112      * @param writer
1113      * @param quantity
1114      * @param startWithLorem
1115      * @throws IOException 
1116      */
1117     public void generateParagraphsPlainText(Appendable writer, int quantity, int maxCols, boolean startWithLorem) throws IOException {
1118 
1119         String delims = " .,?!";
1120         String unformatted = generateMarkupParagraphs("", "", LINE_SEPARATOR + LINE_SEPARATOR, quantity, startWithLorem);
1121         int len = unformatted.length();
1122 
1123         if (maxCols <= 0)
1124             writer.append(unformatted);
1125         else {
1126             int startPos = 0;
1127             while (startPos < len - 1) {
1128                 int endPos = Math.min(startPos + maxCols, len - 1);
1129                 boolean shift = true;
1130                 // check if there is already a line break:
1131                 for (int i = startPos; i < endPos; i++) {
1132                     if (unformatted.charAt(i) == '\n') {
1133                         shift = false;
1134                         endPos = i;
1135                     }
1136                 }
1137                 char ch = unformatted.charAt(endPos);
1138                 while (shift) {
1139                     for (int i = 0; i < delims.length(); i++) {
1140                         if (ch == delims.charAt(i)) {
1141                             shift = false;
1142                             break;
1143                         }
1144                     }
1145                     if (shift) {
1146                         ch = unformatted.charAt(--endPos);
1147                         shift = endPos > startPos;
1148                     }
1149                 }
1150                 writer.append(unformatted.substring(startPos, endPos + 1));
1151                 if (unformatted.charAt(endPos) != '\n')
1152                     writer.append(LINE_SEPARATOR);
1153                 startPos = endPos + 1;
1154             }
1155         }
1156     }
1157 
1158 }