textwrap/
wrap_algorithms.rs

1//! Word wrapping algorithms.
2//!
3//! After a text has been broken into words (or [`Fragment`]s), one
4//! now has to decide how to break the fragments into lines. The
5//! simplest algorithm for this is implemented by
6//! [`wrap_first_fit()`]: it uses no look-ahead and simply adds
7//! fragments to the line as long as they fit. However, this can lead
8//! to poor line breaks if a large fragment almost-but-not-quite fits
9//! on a line. When that happens, the fragment is moved to the next
10//! line and it will leave behind a large gap.
11//!
12//! A more advanced algorithm, implemented by [`wrap_optimal_fit()`],
13//! will take this into account. The optimal-fit algorithm considers
14//! all possible line breaks and will attempt to minimize the gaps
15//! left behind by overly short lines.
16//!
17//! While both algorithms run in linear time, the first-fit algorithm
18//! is about 4 times faster than the optimal-fit algorithm.
19
20#[cfg(feature = "smawk")]
21mod optimal_fit;
22#[cfg(feature = "smawk")]
23pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
24
25use crate::core::{Fragment, Word};
26
27/// Describes how to wrap words into lines.
28///
29/// The simplest approach is to wrap words one word at a time and
30/// accept the first way of wrapping which fit
31/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
32/// enabled, a more complex algorithm is available which will look at
33/// an entire paragraph at a time in order to find optimal line breaks
34/// ([`WrapAlgorithm::OptimalFit`]).
35#[derive(Clone, Copy, Debug)]
36pub enum WrapAlgorithm {
37    /// Wrap words using a fast and simple algorithm.
38    ///
39    /// This algorithm uses no look-ahead when finding line breaks.
40    /// Implemented by [`wrap_first_fit()`], please see that function
41    /// for details and examples.
42    FirstFit,
43
44    /// Wrap words using an advanced algorithm with look-ahead.
45    ///
46    /// This wrapping algorithm considers the entire paragraph to find
47    /// optimal line breaks. When wrapping text, "penalties" are
48    /// assigned to line breaks based on the gaps left at the end of
49    /// lines. See [`Penalties`] for details.
50    ///
51    /// The underlying wrapping algorithm is implemented by
52    /// [`wrap_optimal_fit()`], please see that function for examples.
53    ///
54    /// **Note:** Only available when the `smawk` Cargo feature is
55    /// enabled.
56    #[cfg(feature = "smawk")]
57    OptimalFit(Penalties),
58
59    /// Custom wrapping function.
60    ///
61    /// Use this if you want to implement your own wrapping algorithm.
62    /// The function can freely decide how to turn a slice of
63    /// [`Word`]s into lines.
64    ///
65    /// # Example
66    ///
67    /// ```
68    /// use textwrap::core::Word;
69    /// use textwrap::{wrap, Options, WrapAlgorithm};
70    ///
71    /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
72    ///     let mut lines = Vec::new();
73    ///     let mut step = 1;
74    ///     let mut start_idx = 0;
75    ///     while start_idx + step <= words.len() {
76    ///       lines.push(&words[start_idx .. start_idx+step]);
77    ///       start_idx += step;
78    ///       step += 1;
79    ///     }
80    ///     lines
81    /// }
82    ///
83    /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
84    /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
85    ///            vec!["First,",
86    ///                 "second, third,",
87    ///                 "fourth, fifth, sixth"]);
88    /// ```
89    Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
90}
91
92impl PartialEq for WrapAlgorithm {
93    /// Compare two wrap algorithms.
94    ///
95    /// ```
96    /// use textwrap::WrapAlgorithm;
97    ///
98    /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit);
99    /// #[cfg(feature = "smawk")] {
100    ///     assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit());
101    /// }
102    /// ```
103    ///
104    /// Note that `WrapAlgorithm::Custom` values never compare equal:
105    ///
106    /// ```
107    /// use textwrap::WrapAlgorithm;
108    ///
109    /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]),
110    ///            WrapAlgorithm::Custom(|words, line_widths| vec![words]));
111    /// ```
112    fn eq(&self, other: &Self) -> bool {
113        match (self, other) {
114            (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true,
115            #[cfg(feature = "smawk")]
116            (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b,
117            (_, _) => false,
118        }
119    }
120}
121
122impl WrapAlgorithm {
123    /// Create new wrap algorithm.
124    ///
125    /// The best wrapping algorithm is used by default, i.e.,
126    /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
127    /// [`WrapAlgorithm::FirstFit`].
128    pub const fn new() -> Self {
129        #[cfg(not(feature = "smawk"))]
130        {
131            WrapAlgorithm::FirstFit
132        }
133
134        #[cfg(feature = "smawk")]
135        {
136            WrapAlgorithm::new_optimal_fit()
137        }
138    }
139
140    /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
141    /// works well for monospace text.
142    ///
143    /// **Note:** Only available when the `smawk` Cargo feature is
144    /// enabled.
145    #[cfg(feature = "smawk")]
146    pub const fn new_optimal_fit() -> Self {
147        WrapAlgorithm::OptimalFit(Penalties::new())
148    }
149
150    /// Wrap words according to line widths.
151    ///
152    /// The `line_widths` slice gives the target line width for each
153    /// line (the last slice element is repeated as necessary). This
154    /// can be used to implement hanging indentation.
155    #[inline]
156    pub fn wrap<'a, 'b>(
157        &self,
158        words: &'b [Word<'a>],
159        line_widths: &'b [usize],
160    ) -> Vec<&'b [Word<'a>]> {
161        // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
162        // = 9_007_199_254_740_992 can be represented without loss by
163        // a f64. Larger line widths will be rounded to the nearest
164        // representable number.
165        let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
166
167        match self {
168            WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
169
170            #[cfg(feature = "smawk")]
171            WrapAlgorithm::OptimalFit(penalties) => {
172                // The computation cannot overflow when the line
173                // widths are restricted to usize.
174                wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
175            }
176
177            WrapAlgorithm::Custom(func) => func(words, line_widths),
178        }
179    }
180}
181
182impl Default for WrapAlgorithm {
183    fn default() -> Self {
184        WrapAlgorithm::new()
185    }
186}
187
188/// Wrap abstract fragments into lines with a first-fit algorithm.
189///
190/// The `line_widths` slice gives the target line width for each line
191/// (the last slice element is repeated as necessary). This can be
192/// used to implement hanging indentation.
193///
194/// The fragments must already have been split into the desired
195/// widths, this function will not (and cannot) attempt to split them
196/// further when arranging them into lines.
197///
198/// # First-Fit Algorithm
199///
200/// This implements a simple “greedy” algorithm: accumulate fragments
201/// one by one and when a fragment no longer fits, start a new line.
202/// There is no look-ahead, we simply take first fit of the fragments
203/// we find.
204///
205/// While fast and predictable, this algorithm can produce poor line
206/// breaks when a long fragment is moved to a new line, leaving behind
207/// a large gap:
208///
209/// ```
210/// use textwrap::core::Word;
211/// use textwrap::wrap_algorithms::wrap_first_fit;
212/// use textwrap::WordSeparator;
213///
214/// // Helper to convert wrapped lines to a Vec<String>.
215/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
216///     lines.iter().map(|line| {
217///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
218///     }).collect::<Vec<_>>()
219/// }
220///
221/// let text = "These few words will unfortunately not wrap nicely.";
222/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
223/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
224///            vec!["These few words",
225///                 "will",  // <-- short line
226///                 "unfortunately",
227///                 "not wrap",
228///                 "nicely."]);
229///
230/// // We can avoid the short line if we look ahead:
231/// #[cfg(feature = "smawk")]
232/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
233/// #[cfg(feature = "smawk")]
234/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
235///            vec!["These few",
236///                 "words will",
237///                 "unfortunately",
238///                 "not wrap",
239///                 "nicely."]);
240/// ```
241///
242/// The [`wrap_optimal_fit()`] function was used above to get better
243/// line breaks. It uses an advanced algorithm which tries to avoid
244/// short lines. This function is about 4 times faster than
245/// [`wrap_optimal_fit()`].
246///
247/// # Examples
248///
249/// Imagine you're building a house site and you have a number of
250/// tasks you need to execute. Things like pour foundation, complete
251/// framing, install plumbing, electric cabling, install insulation.
252///
253/// The construction workers can only work during daytime, so they
254/// need to pack up everything at night. Because they need to secure
255/// their tools and move machines back to the garage, this process
256/// takes much more time than the time it would take them to simply
257/// switch to another task.
258///
259/// You would like to make a list of tasks to execute every day based
260/// on your estimates. You can model this with a program like this:
261///
262/// ```
263/// use textwrap::core::{Fragment, Word};
264/// use textwrap::wrap_algorithms::wrap_first_fit;
265///
266/// #[derive(Debug)]
267/// struct Task<'a> {
268///     name: &'a str,
269///     hours: f64,   // Time needed to complete task.
270///     sweep: f64,   // Time needed for a quick sweep after task during the day.
271///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
272/// }
273///
274/// impl Fragment for Task<'_> {
275///     fn width(&self) -> f64 { self.hours }
276///     fn whitespace_width(&self) -> f64 { self.sweep }
277///     fn penalty_width(&self) -> f64 { self.cleanup }
278/// }
279///
280/// // The morning tasks
281/// let tasks = vec![
282///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
283///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
284///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
285///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
286///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
287///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
288///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
289///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
290///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
291/// ];
292///
293/// // Fill tasks into days, taking `day_length` into account. The
294/// // output shows the hours worked per day along with the names of
295/// // the tasks for that day.
296/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
297///     let mut days = Vec::new();
298///     // Assign tasks to days. The assignment is a vector of slices,
299///     // with a slice per day.
300///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
301///     for day in assigned_days.iter() {
302///         let last = day.last().unwrap();
303///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
304///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
305///         days.push((work_hours - last.sweep + last.cleanup, names));
306///     }
307///     days
308/// }
309///
310/// // With a single crew working 8 hours a day:
311/// assert_eq!(
312///     assign_days(&tasks, 8.0),
313///     [
314///         (7.0, vec!["Foundation"]),
315///         (8.0, vec!["Framing", "Plumbing"]),
316///         (7.0, vec!["Electrical", "Insulation"]),
317///         (5.0, vec!["Drywall"]),
318///         (7.0, vec!["Floors", "Countertops"]),
319///         (4.0, vec!["Bathrooms"]),
320///     ]
321/// );
322///
323/// // With two crews working in shifts, 16 hours a day:
324/// assert_eq!(
325///     assign_days(&tasks, 16.0),
326///     [
327///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
328///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
329///         (6.0, vec!["Countertops", "Bathrooms"]),
330///     ]
331/// );
332/// ```
333///
334/// Apologies to anyone who actually knows how to build a house and
335/// knows how long each step takes :-)
336pub fn wrap_first_fit<'a, T: Fragment>(fragments: &'a [T], line_widths: &[f64]) -> Vec<&'a [T]> {
337    // The final line width is used for all remaining lines.
338    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
339    let mut lines = Vec::new();
340    let mut start = 0;
341    let mut width = 0.0;
342
343    for (idx, fragment) in fragments.iter().enumerate() {
344        let line_width = line_widths
345            .get(lines.len())
346            .copied()
347            .unwrap_or(default_line_width);
348        if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
349            lines.push(&fragments[start..idx]);
350            start = idx;
351            width = 0.0;
352        }
353        width += fragment.width() + fragment.whitespace_width();
354    }
355    lines.push(&fragments[start..]);
356    lines
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[derive(Debug, PartialEq)]
364    struct Word(f64);
365
366    #[rustfmt::skip]
367    impl Fragment for Word {
368        fn width(&self) -> f64 { self.0 }
369        fn whitespace_width(&self) -> f64 { 1.0 }
370        fn penalty_width(&self) -> f64 { 0.0 }
371    }
372
373    #[test]
374    fn wrap_string_longer_than_f64() {
375        let words = vec![
376            Word(1e307),
377            Word(2e307),
378            Word(3e307),
379            Word(4e307),
380            Word(5e307),
381            Word(6e307),
382        ];
383        // Wrap at just under f64::MAX (~19e307). The tiny
384        // whitespace_widths disappear because of loss of precision.
385        assert_eq!(
386            wrap_first_fit(&words, &[15e307]),
387            &[
388                vec![
389                    Word(1e307),
390                    Word(2e307),
391                    Word(3e307),
392                    Word(4e307),
393                    Word(5e307)
394                ],
395                vec![Word(6e307)]
396            ]
397        );
398    }
399}