textwrap/wrap_algorithms.rs
1//! Word wrapping algorithms.
2//!
3//! After a text has been broken into words (or [`Fragment`]s), one
4//! now has to decide how to break the fragments into lines. The
5//! simplest algorithm for this is implemented by
6//! [`wrap_first_fit()`]: it uses no look-ahead and simply adds
7//! fragments to the line as long as they fit. However, this can lead
8//! to poor line breaks if a large fragment almost-but-not-quite fits
9//! on a line. When that happens, the fragment is moved to the next
10//! line and it will leave behind a large gap.
11//!
12//! A more advanced algorithm, implemented by [`wrap_optimal_fit()`],
13//! will take this into account. The optimal-fit algorithm considers
14//! all possible line breaks and will attempt to minimize the gaps
15//! left behind by overly short lines.
16//!
17//! While both algorithms run in linear time, the first-fit algorithm
18//! is about 4 times faster than the optimal-fit algorithm.
19
20#[cfg(feature = "smawk")]
21mod optimal_fit;
22#[cfg(feature = "smawk")]
23pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
24
25use crate::core::{Fragment, Word};
26
27/// Describes how to wrap words into lines.
28///
29/// The simplest approach is to wrap words one word at a time and
30/// accept the first way of wrapping which fit
31/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
32/// enabled, a more complex algorithm is available which will look at
33/// an entire paragraph at a time in order to find optimal line breaks
34/// ([`WrapAlgorithm::OptimalFit`]).
35#[derive(Clone, Copy, Debug)]
36pub enum WrapAlgorithm {
37 /// Wrap words using a fast and simple algorithm.
38 ///
39 /// This algorithm uses no look-ahead when finding line breaks.
40 /// Implemented by [`wrap_first_fit()`], please see that function
41 /// for details and examples.
42 FirstFit,
43
44 /// Wrap words using an advanced algorithm with look-ahead.
45 ///
46 /// This wrapping algorithm considers the entire paragraph to find
47 /// optimal line breaks. When wrapping text, "penalties" are
48 /// assigned to line breaks based on the gaps left at the end of
49 /// lines. See [`Penalties`] for details.
50 ///
51 /// The underlying wrapping algorithm is implemented by
52 /// [`wrap_optimal_fit()`], please see that function for examples.
53 ///
54 /// **Note:** Only available when the `smawk` Cargo feature is
55 /// enabled.
56 #[cfg(feature = "smawk")]
57 OptimalFit(Penalties),
58
59 /// Custom wrapping function.
60 ///
61 /// Use this if you want to implement your own wrapping algorithm.
62 /// The function can freely decide how to turn a slice of
63 /// [`Word`]s into lines.
64 ///
65 /// # Example
66 ///
67 /// ```
68 /// use textwrap::core::Word;
69 /// use textwrap::{wrap, Options, WrapAlgorithm};
70 ///
71 /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
72 /// let mut lines = Vec::new();
73 /// let mut step = 1;
74 /// let mut start_idx = 0;
75 /// while start_idx + step <= words.len() {
76 /// lines.push(&words[start_idx .. start_idx+step]);
77 /// start_idx += step;
78 /// step += 1;
79 /// }
80 /// lines
81 /// }
82 ///
83 /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
84 /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
85 /// vec!["First,",
86 /// "second, third,",
87 /// "fourth, fifth, sixth"]);
88 /// ```
89 Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
90}
91
92impl PartialEq for WrapAlgorithm {
93 /// Compare two wrap algorithms.
94 ///
95 /// ```
96 /// use textwrap::WrapAlgorithm;
97 ///
98 /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit);
99 /// #[cfg(feature = "smawk")] {
100 /// assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit());
101 /// }
102 /// ```
103 ///
104 /// Note that `WrapAlgorithm::Custom` values never compare equal:
105 ///
106 /// ```
107 /// use textwrap::WrapAlgorithm;
108 ///
109 /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]),
110 /// WrapAlgorithm::Custom(|words, line_widths| vec![words]));
111 /// ```
112 fn eq(&self, other: &Self) -> bool {
113 match (self, other) {
114 (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true,
115 #[cfg(feature = "smawk")]
116 (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b,
117 (_, _) => false,
118 }
119 }
120}
121
122impl WrapAlgorithm {
123 /// Create new wrap algorithm.
124 ///
125 /// The best wrapping algorithm is used by default, i.e.,
126 /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
127 /// [`WrapAlgorithm::FirstFit`].
128 pub const fn new() -> Self {
129 #[cfg(not(feature = "smawk"))]
130 {
131 WrapAlgorithm::FirstFit
132 }
133
134 #[cfg(feature = "smawk")]
135 {
136 WrapAlgorithm::new_optimal_fit()
137 }
138 }
139
140 /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
141 /// works well for monospace text.
142 ///
143 /// **Note:** Only available when the `smawk` Cargo feature is
144 /// enabled.
145 #[cfg(feature = "smawk")]
146 pub const fn new_optimal_fit() -> Self {
147 WrapAlgorithm::OptimalFit(Penalties::new())
148 }
149
150 /// Wrap words according to line widths.
151 ///
152 /// The `line_widths` slice gives the target line width for each
153 /// line (the last slice element is repeated as necessary). This
154 /// can be used to implement hanging indentation.
155 #[inline]
156 pub fn wrap<'a, 'b>(
157 &self,
158 words: &'b [Word<'a>],
159 line_widths: &'b [usize],
160 ) -> Vec<&'b [Word<'a>]> {
161 // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
162 // = 9_007_199_254_740_992 can be represented without loss by
163 // a f64. Larger line widths will be rounded to the nearest
164 // representable number.
165 let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
166
167 match self {
168 WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
169
170 #[cfg(feature = "smawk")]
171 WrapAlgorithm::OptimalFit(penalties) => {
172 // The computation cannot overflow when the line
173 // widths are restricted to usize.
174 wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
175 }
176
177 WrapAlgorithm::Custom(func) => func(words, line_widths),
178 }
179 }
180}
181
182impl Default for WrapAlgorithm {
183 fn default() -> Self {
184 WrapAlgorithm::new()
185 }
186}
187
188/// Wrap abstract fragments into lines with a first-fit algorithm.
189///
190/// The `line_widths` slice gives the target line width for each line
191/// (the last slice element is repeated as necessary). This can be
192/// used to implement hanging indentation.
193///
194/// The fragments must already have been split into the desired
195/// widths, this function will not (and cannot) attempt to split them
196/// further when arranging them into lines.
197///
198/// # First-Fit Algorithm
199///
200/// This implements a simple “greedy” algorithm: accumulate fragments
201/// one by one and when a fragment no longer fits, start a new line.
202/// There is no look-ahead, we simply take first fit of the fragments
203/// we find.
204///
205/// While fast and predictable, this algorithm can produce poor line
206/// breaks when a long fragment is moved to a new line, leaving behind
207/// a large gap:
208///
209/// ```
210/// use textwrap::core::Word;
211/// use textwrap::wrap_algorithms::wrap_first_fit;
212/// use textwrap::WordSeparator;
213///
214/// // Helper to convert wrapped lines to a Vec<String>.
215/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
216/// lines.iter().map(|line| {
217/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
218/// }).collect::<Vec<_>>()
219/// }
220///
221/// let text = "These few words will unfortunately not wrap nicely.";
222/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
223/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
224/// vec!["These few words",
225/// "will", // <-- short line
226/// "unfortunately",
227/// "not wrap",
228/// "nicely."]);
229///
230/// // We can avoid the short line if we look ahead:
231/// #[cfg(feature = "smawk")]
232/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
233/// #[cfg(feature = "smawk")]
234/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
235/// vec!["These few",
236/// "words will",
237/// "unfortunately",
238/// "not wrap",
239/// "nicely."]);
240/// ```
241///
242/// The [`wrap_optimal_fit()`] function was used above to get better
243/// line breaks. It uses an advanced algorithm which tries to avoid
244/// short lines. This function is about 4 times faster than
245/// [`wrap_optimal_fit()`].
246///
247/// # Examples
248///
249/// Imagine you're building a house site and you have a number of
250/// tasks you need to execute. Things like pour foundation, complete
251/// framing, install plumbing, electric cabling, install insulation.
252///
253/// The construction workers can only work during daytime, so they
254/// need to pack up everything at night. Because they need to secure
255/// their tools and move machines back to the garage, this process
256/// takes much more time than the time it would take them to simply
257/// switch to another task.
258///
259/// You would like to make a list of tasks to execute every day based
260/// on your estimates. You can model this with a program like this:
261///
262/// ```
263/// use textwrap::core::{Fragment, Word};
264/// use textwrap::wrap_algorithms::wrap_first_fit;
265///
266/// #[derive(Debug)]
267/// struct Task<'a> {
268/// name: &'a str,
269/// hours: f64, // Time needed to complete task.
270/// sweep: f64, // Time needed for a quick sweep after task during the day.
271/// cleanup: f64, // Time needed for full cleanup if day ends with this task.
272/// }
273///
274/// impl Fragment for Task<'_> {
275/// fn width(&self) -> f64 { self.hours }
276/// fn whitespace_width(&self) -> f64 { self.sweep }
277/// fn penalty_width(&self) -> f64 { self.cleanup }
278/// }
279///
280/// // The morning tasks
281/// let tasks = vec![
282/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
283/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
284/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
285/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
286/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
287/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
288/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
289/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
290/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
291/// ];
292///
293/// // Fill tasks into days, taking `day_length` into account. The
294/// // output shows the hours worked per day along with the names of
295/// // the tasks for that day.
296/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
297/// let mut days = Vec::new();
298/// // Assign tasks to days. The assignment is a vector of slices,
299/// // with a slice per day.
300/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
301/// for day in assigned_days.iter() {
302/// let last = day.last().unwrap();
303/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
304/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
305/// days.push((work_hours - last.sweep + last.cleanup, names));
306/// }
307/// days
308/// }
309///
310/// // With a single crew working 8 hours a day:
311/// assert_eq!(
312/// assign_days(&tasks, 8.0),
313/// [
314/// (7.0, vec!["Foundation"]),
315/// (8.0, vec!["Framing", "Plumbing"]),
316/// (7.0, vec!["Electrical", "Insulation"]),
317/// (5.0, vec!["Drywall"]),
318/// (7.0, vec!["Floors", "Countertops"]),
319/// (4.0, vec!["Bathrooms"]),
320/// ]
321/// );
322///
323/// // With two crews working in shifts, 16 hours a day:
324/// assert_eq!(
325/// assign_days(&tasks, 16.0),
326/// [
327/// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
328/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
329/// (6.0, vec!["Countertops", "Bathrooms"]),
330/// ]
331/// );
332/// ```
333///
334/// Apologies to anyone who actually knows how to build a house and
335/// knows how long each step takes :-)
336pub fn wrap_first_fit<'a, T: Fragment>(fragments: &'a [T], line_widths: &[f64]) -> Vec<&'a [T]> {
337 // The final line width is used for all remaining lines.
338 let default_line_width = line_widths.last().copied().unwrap_or(0.0);
339 let mut lines = Vec::new();
340 let mut start = 0;
341 let mut width = 0.0;
342
343 for (idx, fragment) in fragments.iter().enumerate() {
344 let line_width = line_widths
345 .get(lines.len())
346 .copied()
347 .unwrap_or(default_line_width);
348 if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
349 lines.push(&fragments[start..idx]);
350 start = idx;
351 width = 0.0;
352 }
353 width += fragment.width() + fragment.whitespace_width();
354 }
355 lines.push(&fragments[start..]);
356 lines
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[derive(Debug, PartialEq)]
364 struct Word(f64);
365
366 #[rustfmt::skip]
367 impl Fragment for Word {
368 fn width(&self) -> f64 { self.0 }
369 fn whitespace_width(&self) -> f64 { 1.0 }
370 fn penalty_width(&self) -> f64 { 0.0 }
371 }
372
373 #[test]
374 fn wrap_string_longer_than_f64() {
375 let words = vec![
376 Word(1e307),
377 Word(2e307),
378 Word(3e307),
379 Word(4e307),
380 Word(5e307),
381 Word(6e307),
382 ];
383 // Wrap at just under f64::MAX (~19e307). The tiny
384 // whitespace_widths disappear because of loss of precision.
385 assert_eq!(
386 wrap_first_fit(&words, &[15e307]),
387 &[
388 vec![
389 Word(1e307),
390 Word(2e307),
391 Word(3e307),
392 Word(4e307),
393 Word(5e307)
394 ],
395 vec![Word(6e307)]
396 ]
397 );
398 }
399}