textwrap/
wrap.rs

1//! Functions for wrapping text.
2
3use std::borrow::Cow;
4
5use crate::core::{break_words, display_width, Word};
6use crate::word_splitters::split_words;
7use crate::Options;
8
9/// Wrap a line of text at a given width.
10///
11/// The result is a vector of lines, each line is of type [`Cow<'_,
12/// str>`](Cow), which means that the line will borrow from the input
13/// `&str` if possible. The lines do not have trailing whitespace,
14/// including a final `'\n'`. Please use [`fill()`](crate::fill()) if
15/// you need a [`String`] instead.
16///
17/// The easiest way to use this function is to pass an integer for
18/// `width_or_options`:
19///
20/// ```
21/// use textwrap::wrap;
22///
23/// let lines = wrap("Memory safety without garbage collection.", 15);
24/// assert_eq!(lines, &[
25///     "Memory safety",
26///     "without garbage",
27///     "collection.",
28/// ]);
29/// ```
30///
31/// If you need to customize the wrapping, you can pass an [`Options`]
32/// instead of an `usize`:
33///
34/// ```
35/// use textwrap::{wrap, Options};
36///
37/// let options = Options::new(15)
38///     .initial_indent("- ")
39///     .subsequent_indent("  ");
40/// let lines = wrap("Memory safety without garbage collection.", &options);
41/// assert_eq!(lines, &[
42///     "- Memory safety",
43///     "  without",
44///     "  garbage",
45///     "  collection.",
46/// ]);
47/// ```
48///
49/// # Optimal-Fit Wrapping
50///
51/// By default, `wrap` will try to ensure an even right margin by
52/// finding breaks which avoid short lines. We call this an
53/// “optimal-fit algorithm” since the line breaks are computed by
54/// considering all possible line breaks. The alternative is a
55/// “first-fit algorithm” which simply accumulates words until they no
56/// longer fit on the line.
57///
58/// As an example, using the first-fit algorithm to wrap the famous
59/// Hamlet quote “To be, or not to be: that is the question” in a
60/// narrow column with room for only 10 characters looks like this:
61///
62/// ```
63/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap};
64/// #
65/// # let lines = wrap("To be, or not to be: that is the question",
66/// #                  Options::new(10).wrap_algorithm(FirstFit));
67/// # assert_eq!(lines.join("\n") + "\n", "\
68/// To be, or
69/// not to be:
70/// that is
71/// the
72/// question
73/// # ");
74/// ```
75///
76/// Notice how the second to last line is quite narrow because
77/// “question” was too large to fit? The greedy first-fit algorithm
78/// doesn’t look ahead, so it has no other option than to put
79/// “question” onto its own line.
80///
81/// With the optimal-fit wrapping algorithm, the previous lines are
82/// shortened slightly in order to make the word “is” go into the
83/// second last line:
84///
85/// ```
86/// # #[cfg(feature = "smawk")] {
87/// # use textwrap::{Options, WrapAlgorithm, wrap};
88/// #
89/// # let lines = wrap(
90/// #     "To be, or not to be: that is the question",
91/// #     Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit())
92/// # );
93/// # assert_eq!(lines.join("\n") + "\n", "\
94/// To be,
95/// or not to
96/// be: that
97/// is the
98/// question
99/// # "); }
100/// ```
101///
102/// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on
103/// the choices.
104///
105/// # Examples
106///
107/// The returned iterator yields lines of type `Cow<'_, str>`. If
108/// possible, the wrapped lines will borrow from the input string. As
109/// an example, a hanging indentation, the first line can borrow from
110/// the input, but the subsequent lines become owned strings:
111///
112/// ```
113/// use std::borrow::Cow::{Borrowed, Owned};
114/// use textwrap::{wrap, Options};
115///
116/// let options = Options::new(15).subsequent_indent("....");
117/// let lines = wrap("Wrapping text all day long.", &options);
118/// let annotated = lines
119///     .iter()
120///     .map(|line| match line {
121///         Borrowed(text) => format!("[Borrowed] {}", text),
122///         Owned(text) => format!("[Owned]    {}", text),
123///     })
124///     .collect::<Vec<_>>();
125/// assert_eq!(
126///     annotated,
127///     &[
128///         "[Borrowed] Wrapping text",
129///         "[Owned]    ....all day",
130///         "[Owned]    ....long.",
131///     ]
132/// );
133/// ```
134///
135/// ## Leading and Trailing Whitespace
136///
137/// As a rule, leading whitespace (indentation) is preserved and
138/// trailing whitespace is discarded.
139///
140/// In more details, when wrapping words into lines, words are found
141/// by splitting the input text on space characters. One or more
142/// spaces (shown here as “␣”) are attached to the end of each word:
143///
144/// ```text
145/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"]
146/// ```
147///
148/// These words are then put into lines. The interword whitespace is
149/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"`
150/// word falls at the end of a line:
151///
152/// ```
153/// use textwrap::wrap;
154///
155/// assert_eq!(wrap("Foo   bar baz", 10), vec!["Foo   bar", "baz"]);
156/// assert_eq!(wrap("Foo   bar baz", 8), vec!["Foo", "bar baz"]);
157/// ```
158///
159/// Notice how the trailing whitespace is removed in both case: in the
160/// first example, `"bar␣"` becomes `"bar"` and in the second case
161/// `"Foo␣␣␣"` becomes `"Foo"`.
162///
163/// Leading whitespace is preserved when the following word fits on
164/// the first line. To understand this, consider how words are found
165/// in a text with leading spaces:
166///
167/// ```text
168/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"]
169/// ```
170///
171/// When put into lines, the indentation is preserved if `"foo"` fits
172/// on the first line, otherwise you end up with an empty line:
173///
174/// ```
175/// use textwrap::wrap;
176///
177/// assert_eq!(wrap("  foo bar", 8), vec!["  foo", "bar"]);
178/// assert_eq!(wrap("  foo bar", 4), vec!["", "foo", "bar"]);
179/// ```
180pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
181where
182    Opt: Into<Options<'a>>,
183{
184    let options: Options = width_or_options.into();
185    let line_ending_str = options.line_ending.as_str();
186
187    let mut lines = Vec::new();
188    for line in text.split(line_ending_str) {
189        wrap_single_line(line, &options, &mut lines);
190    }
191
192    lines
193}
194
195pub(crate) fn wrap_single_line<'a>(
196    line: &'a str,
197    options: &Options<'_>,
198    lines: &mut Vec<Cow<'a, str>>,
199) {
200    let indent = if lines.is_empty() {
201        options.initial_indent
202    } else {
203        options.subsequent_indent
204    };
205    if line.len() < options.width && indent.is_empty() {
206        lines.push(Cow::from(line.trim_end_matches(' ')));
207    } else {
208        wrap_single_line_slow_path(line, options, lines)
209    }
210}
211
212/// Wrap a single line of text.
213///
214/// This is taken when `line` is longer than `options.width`.
215pub(crate) fn wrap_single_line_slow_path<'a>(
216    line: &'a str,
217    options: &Options<'_>,
218    lines: &mut Vec<Cow<'a, str>>,
219) {
220    let initial_width = options
221        .width
222        .saturating_sub(display_width(options.initial_indent));
223    let subsequent_width = options
224        .width
225        .saturating_sub(display_width(options.subsequent_indent));
226    let line_widths = [initial_width, subsequent_width];
227
228    let words = options.word_separator.find_words(line);
229    let split_words = split_words(words, &options.word_splitter);
230    let broken_words = if options.break_words {
231        let mut broken_words = break_words(split_words, line_widths[1]);
232        if !options.initial_indent.is_empty() {
233            // Without this, the first word will always go into the
234            // first line. However, since we break words based on the
235            // _second_ line width, it can be wrong to unconditionally
236            // put the first word onto the first line. An empty
237            // zero-width word fixed this.
238            broken_words.insert(0, Word::from(""));
239        }
240        broken_words
241    } else {
242        split_words.collect::<Vec<_>>()
243    };
244
245    let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
246
247    let mut idx = 0;
248    for words in wrapped_words {
249        let last_word = match words.last() {
250            None => {
251                lines.push(Cow::from(""));
252                continue;
253            }
254            Some(word) => word,
255        };
256
257        // We assume here that all words are contiguous in `line`.
258        // That is, the sum of their lengths should add up to the
259        // length of `line`.
260        let len = words
261            .iter()
262            .map(|word| word.len() + word.whitespace.len())
263            .sum::<usize>()
264            - last_word.whitespace.len();
265
266        // The result is owned if we have indentation, otherwise we
267        // can simply borrow an empty string.
268        let mut result = if lines.is_empty() && !options.initial_indent.is_empty() {
269            Cow::Owned(options.initial_indent.to_owned())
270        } else if !lines.is_empty() && !options.subsequent_indent.is_empty() {
271            Cow::Owned(options.subsequent_indent.to_owned())
272        } else {
273            // We can use an empty string here since string
274            // concatenation for `Cow` preserves a borrowed value when
275            // either side is empty.
276            Cow::from("")
277        };
278
279        result += &line[idx..idx + len];
280
281        if !last_word.penalty.is_empty() {
282            result.to_mut().push_str(last_word.penalty);
283        }
284
285        lines.push(result);
286
287        // Advance by the length of `result`, plus the length of
288        // `last_word.whitespace` -- even if we had a penalty, we need
289        // to skip over the whitespace.
290        idx += len + last_word.whitespace.len();
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use crate::{WordSeparator, WordSplitter, WrapAlgorithm};
298
299    #[cfg(feature = "hyphenation")]
300    use hyphenation::{Language, Load, Standard};
301
302    #[test]
303    fn no_wrap() {
304        assert_eq!(wrap("foo", 10), vec!["foo"]);
305    }
306
307    #[test]
308    fn wrap_simple() {
309        assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
310    }
311
312    #[test]
313    fn to_be_or_not() {
314        assert_eq!(
315            wrap(
316                "To be, or not to be, that is the question.",
317                Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
318            ),
319            vec!["To be, or", "not to be,", "that is", "the", "question."]
320        );
321    }
322
323    #[test]
324    fn multiple_words_on_first_line() {
325        assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
326    }
327
328    #[test]
329    fn long_word() {
330        assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
331    }
332
333    #[test]
334    fn long_words() {
335        assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
336    }
337
338    #[test]
339    fn max_width() {
340        assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
341
342        let text = "Hello there! This is some English text. \
343                    It should not be wrapped given the extents below.";
344        assert_eq!(wrap(text, usize::MAX), vec![text]);
345    }
346
347    #[test]
348    fn leading_whitespace() {
349        assert_eq!(wrap("  foo bar", 6), vec!["  foo", "bar"]);
350    }
351
352    #[test]
353    fn leading_whitespace_empty_first_line() {
354        // If there is no space for the first word, the first line
355        // will be empty. This is because the string is split into
356        // words like [" ", "foobar ", "baz"], which puts "foobar " on
357        // the second line. We never output trailing whitespace
358        assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
359    }
360
361    #[test]
362    fn trailing_whitespace() {
363        // Whitespace is only significant inside a line. After a line
364        // gets too long and is broken, the first word starts in
365        // column zero and is not indented.
366        assert_eq!(wrap("foo     bar     baz  ", 5), vec!["foo", "bar", "baz"]);
367    }
368
369    #[test]
370    fn issue_99() {
371        // We did not reset the in_whitespace flag correctly and did
372        // not handle single-character words after a line break.
373        assert_eq!(
374            wrap("aaabbbccc x yyyzzzwww", 9),
375            vec!["aaabbbccc", "x", "yyyzzzwww"]
376        );
377    }
378
379    #[test]
380    fn issue_129() {
381        // The dash is an em-dash which takes up four bytes. We used
382        // to panic since we tried to index into the character.
383        let options = Options::new(1).word_separator(WordSeparator::AsciiSpace);
384        assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
385    }
386
387    #[test]
388    fn wide_character_handling() {
389        assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
390        assert_eq!(
391            wrap(
392                "Ｈｅｌｌｏ, Ｗｏｒｌｄ!",
393                Options::new(15).word_separator(WordSeparator::AsciiSpace)
394            ),
395            vec!["Ｈｅｌｌｏ,", "Ｗｏｒｌｄ!"]
396        );
397
398        // Wide characters are allowed to break if the
399        // unicode-linebreak feature is enabled.
400        #[cfg(feature = "unicode-linebreak")]
401        assert_eq!(
402            wrap(
403                "Ｈｅｌｌｏ, Ｗｏｒｌｄ!",
404                Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
405            ),
406            vec!["Ｈｅｌｌｏ, Ｗ", "ｏｒｌｄ!"]
407        );
408    }
409
410    #[test]
411    fn indent_empty_line() {
412        // Previously, indentation was not applied to empty lines.
413        // However, this is somewhat inconsistent and undesirable if
414        // the indentation is something like a border ("| ") which you
415        // want to apply to all lines, empty or not.
416        let options = Options::new(10).initial_indent("!!!");
417        assert_eq!(wrap("", &options), vec!["!!!"]);
418    }
419
420    #[test]
421    fn indent_single_line() {
422        let options = Options::new(10).initial_indent(">>>"); // No trailing space
423        assert_eq!(wrap("foo", &options), vec![">>>foo"]);
424    }
425
426    #[test]
427    fn indent_first_emoji() {
428        let options = Options::new(10).initial_indent("👉👉");
429        assert_eq!(
430            wrap("x x x x x x x x x x x x x", &options),
431            vec!["👉👉x x x", "x x x x x", "x x x x x"]
432        );
433    }
434
435    #[test]
436    fn indent_multiple_lines() {
437        let options = Options::new(6).initial_indent("* ").subsequent_indent("  ");
438        assert_eq!(
439            wrap("foo bar baz", &options),
440            vec!["* foo", "  bar", "  baz"]
441        );
442    }
443
444    #[test]
445    fn only_initial_indent_multiple_lines() {
446        let options = Options::new(10).initial_indent("  ");
447        assert_eq!(wrap("foo\nbar\nbaz", &options), vec!["  foo", "bar", "baz"]);
448    }
449
450    #[test]
451    fn only_subsequent_indent_multiple_lines() {
452        let options = Options::new(10).subsequent_indent("  ");
453        assert_eq!(
454            wrap("foo\nbar\nbaz", &options),
455            vec!["foo", "  bar", "  baz"]
456        );
457    }
458
459    #[test]
460    fn indent_break_words() {
461        let options = Options::new(5).initial_indent("* ").subsequent_indent("  ");
462        assert_eq!(wrap("foobarbaz", &options), vec!["* foo", "  bar", "  baz"]);
463    }
464
465    #[test]
466    fn initial_indent_break_words() {
467        // This is a corner-case showing how the long word is broken
468        // according to the width of the subsequent lines. The first
469        // fragment of the word no longer fits on the first line,
470        // which ends up being pure indentation.
471        let options = Options::new(5).initial_indent("-->");
472        assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]);
473    }
474
475    #[test]
476    fn hyphens() {
477        assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
478    }
479
480    #[test]
481    fn trailing_hyphen() {
482        let options = Options::new(5).break_words(false);
483        assert_eq!(wrap("foobar-", &options), vec!["foobar-"]);
484    }
485
486    #[test]
487    fn multiple_hyphens() {
488        assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
489    }
490
491    #[test]
492    fn hyphens_flag() {
493        let options = Options::new(5).break_words(false);
494        assert_eq!(
495            wrap("The --foo-bar flag.", &options),
496            vec!["The", "--foo-", "bar", "flag."]
497        );
498    }
499
500    #[test]
501    fn repeated_hyphens() {
502        let options = Options::new(4).break_words(false);
503        assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]);
504    }
505
506    #[test]
507    fn hyphens_alphanumeric() {
508        assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
509    }
510
511    #[test]
512    fn hyphens_non_alphanumeric() {
513        let options = Options::new(5).break_words(false);
514        assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]);
515    }
516
517    #[test]
518    fn multiple_splits() {
519        assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
520    }
521
522    #[test]
523    fn forced_split() {
524        let options = Options::new(5).break_words(false);
525        assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]);
526    }
527
528    #[test]
529    fn multiple_unbroken_words_issue_193() {
530        let options = Options::new(3).break_words(false);
531        assert_eq!(
532            wrap("small large tiny", &options),
533            vec!["small", "large", "tiny"]
534        );
535        assert_eq!(
536            wrap("small  large   tiny", &options),
537            vec!["small", "large", "tiny"]
538        );
539    }
540
541    #[test]
542    fn very_narrow_lines_issue_193() {
543        let options = Options::new(1).break_words(false);
544        assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
545        assert_eq!(wrap("fooo   x     y", &options), vec!["fooo", "x", "y"]);
546    }
547
548    #[test]
549    fn simple_hyphens() {
550        let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter);
551        assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
552    }
553
554    #[test]
555    fn no_hyphenation() {
556        let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
557        assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
558    }
559
560    #[test]
561    #[cfg(feature = "hyphenation")]
562    fn auto_hyphenation_double_hyphenation() {
563        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
564        let options = Options::new(10);
565        assert_eq!(
566            wrap("Internationalization", &options),
567            vec!["Internatio", "nalization"]
568        );
569
570        let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
571        assert_eq!(
572            wrap("Internationalization", &options),
573            vec!["Interna-", "tionaliza-", "tion"]
574        );
575    }
576
577    #[test]
578    #[cfg(feature = "hyphenation")]
579    fn auto_hyphenation_issue_158() {
580        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
581        let options = Options::new(10);
582        assert_eq!(
583            wrap("participation is the key to success", &options),
584            vec!["participat", "ion is", "the key to", "success"]
585        );
586
587        let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
588        assert_eq!(
589            wrap("participation is the key to success", &options),
590            vec!["partici-", "pation is", "the key to", "success"]
591        );
592    }
593
594    #[test]
595    #[cfg(feature = "hyphenation")]
596    fn split_len_hyphenation() {
597        // Test that hyphenation takes the width of the whitespace
598        // into account.
599        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
600        let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary));
601        assert_eq!(
602            wrap("garbage   collection", &options),
603            vec!["garbage   col-", "lection"]
604        );
605    }
606
607    #[test]
608    #[cfg(feature = "hyphenation")]
609    fn borrowed_lines() {
610        // Lines that end with an extra hyphen are owned, the final
611        // line is borrowed.
612        use std::borrow::Cow::{Borrowed, Owned};
613        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
614        let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
615        let lines = wrap("Internationalization", &options);
616        assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]);
617        if let Borrowed(s) = lines[0] {
618            assert!(false, "should not have been borrowed: {:?}", s);
619        }
620        if let Borrowed(s) = lines[1] {
621            assert!(false, "should not have been borrowed: {:?}", s);
622        }
623        if let Owned(ref s) = lines[2] {
624            assert!(false, "should not have been owned: {:?}", s);
625        }
626    }
627
628    #[test]
629    #[cfg(feature = "hyphenation")]
630    fn auto_hyphenation_with_hyphen() {
631        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
632        let options = Options::new(8).break_words(false);
633        assert_eq!(
634            wrap("over-caffinated", &options),
635            vec!["over-", "caffinated"]
636        );
637
638        let options = options.word_splitter(WordSplitter::Hyphenation(dictionary));
639        assert_eq!(
640            wrap("over-caffinated", &options),
641            vec!["over-", "caffi-", "nated"]
642        );
643    }
644
645    #[test]
646    fn break_words() {
647        assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
648    }
649
650    #[test]
651    fn break_words_wide_characters() {
652        // Even the poor man's version of `ch_width` counts these
653        // characters as wide.
654        let options = Options::new(5).word_separator(WordSeparator::AsciiSpace);
655        assert_eq!(wrap("Ｈｅｌｌｏ", options), vec!["Ｈｅ", "ｌｌ", "ｏ"]);
656    }
657
658    #[test]
659    fn break_words_zero_width() {
660        assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
661    }
662
663    #[test]
664    fn break_long_first_word() {
665        assert_eq!(wrap("testx y", 4), vec!["test", "x y"]);
666    }
667
668    #[test]
669    fn wrap_preserves_line_breaks_trims_whitespace() {
670        assert_eq!(wrap("  ", 80), vec![""]);
671        assert_eq!(wrap("  \n  ", 80), vec!["", ""]);
672        assert_eq!(wrap("  \n \n  \n ", 80), vec!["", "", "", ""]);
673    }
674
675    #[test]
676    fn wrap_colored_text() {
677        // The words are much longer than 6 bytes, but they remain
678        // intact after filling the text.
679        let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m";
680        let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m";
681        assert_eq!(
682            wrap(&format!("{} {}", green_hello, blue_world), 6),
683            vec![green_hello, blue_world],
684        );
685    }
686}
textwrap/wrap.rs

textwrap/
wrap.rs