mirror of https://github.com/rust-lang/cargo
447 lines
19 KiB
Rust
447 lines
19 KiB
Rust
//! Man-page formatter.
|
||
|
||
use crate::util::{header_text, parse_name_and_section};
|
||
use crate::EventIter;
|
||
use anyhow::{bail, Error};
|
||
use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag, TagEnd};
|
||
use std::fmt::Write;
|
||
use url::Url;
|
||
|
||
pub struct ManFormatter {
|
||
url: Option<Url>,
|
||
}
|
||
|
||
impl ManFormatter {
|
||
pub fn new(url: Option<Url>) -> ManFormatter {
|
||
ManFormatter { url }
|
||
}
|
||
}
|
||
|
||
impl super::Formatter for ManFormatter {
|
||
fn render(&self, input: &str) -> Result<String, Error> {
|
||
ManRenderer::render(input, self.url.clone())
|
||
}
|
||
|
||
fn render_options_start(&self) -> &'static str {
|
||
// Tell pulldown_cmark to ignore this.
|
||
// This will be stripped out later.
|
||
"<![CDATA["
|
||
}
|
||
|
||
fn render_options_end(&self) -> &'static str {
|
||
"]]>"
|
||
}
|
||
|
||
fn render_option(
|
||
&self,
|
||
params: &[&str],
|
||
block: &str,
|
||
_man_name: &str,
|
||
) -> Result<String, Error> {
|
||
let rendered_options = params
|
||
.iter()
|
||
.map(|param| {
|
||
let r = self.render(param)?;
|
||
Ok(r.trim().trim_start_matches(".sp").to_string())
|
||
})
|
||
.collect::<Result<Vec<_>, Error>>()?;
|
||
let rendered_block = self.render(block)?;
|
||
let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim();
|
||
// .RS = move left margin to right 4.
|
||
// .RE = move margin back one level.
|
||
Ok(format!(
|
||
"\n.sp\n{}\n.RS 4\n{}\n.RE\n",
|
||
rendered_options.join(", "),
|
||
rendered_block
|
||
))
|
||
}
|
||
|
||
fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
|
||
Ok(format!("`{}`({})", name, section))
|
||
}
|
||
}
|
||
|
||
#[derive(Copy, Clone)]
|
||
enum Font {
|
||
Bold,
|
||
Italic,
|
||
}
|
||
|
||
impl Font {
|
||
fn str_from_stack(font_stack: &[Font]) -> &'static str {
|
||
let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold));
|
||
let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic));
|
||
match (has_bold, has_italic) {
|
||
(false, false) => "\\fR", // roman (normal)
|
||
(false, true) => "\\fI", // italic
|
||
(true, false) => "\\fB", // bold
|
||
(true, true) => "\\f(BI", // bold italic
|
||
}
|
||
}
|
||
}
|
||
|
||
struct ManRenderer<'e> {
|
||
output: String,
|
||
parser: EventIter<'e>,
|
||
font_stack: Vec<Font>,
|
||
}
|
||
|
||
impl<'e> ManRenderer<'e> {
|
||
fn render(input: &str, url: Option<Url>) -> Result<String, Error> {
|
||
let parser = crate::md_parser(input, url);
|
||
let output = String::with_capacity(input.len() * 3 / 2);
|
||
let mut mr = ManRenderer {
|
||
parser,
|
||
output,
|
||
font_stack: Vec::new(),
|
||
};
|
||
mr.push_man()?;
|
||
Ok(mr.output)
|
||
}
|
||
|
||
fn push_man(&mut self) -> Result<(), Error> {
|
||
// If this is true, this is inside a cdata block used for hiding
|
||
// content from pulldown_cmark.
|
||
let mut in_cdata = false;
|
||
// The current list stack. None if unordered, Some if ordered with the
|
||
// given number as the current index.
|
||
let mut list: Vec<Option<u64>> = Vec::new();
|
||
// Used in some cases where spacing isn't desired.
|
||
let mut suppress_paragraph = false;
|
||
let mut table_cell_index = 0;
|
||
|
||
let mut last_seen_link_data = None;
|
||
while let Some((event, range)) = self.parser.next() {
|
||
let this_suppress_paragraph = suppress_paragraph;
|
||
suppress_paragraph = false;
|
||
match event {
|
||
Event::Start(tag) => {
|
||
match tag {
|
||
Tag::Paragraph => {
|
||
if !this_suppress_paragraph {
|
||
self.flush();
|
||
self.output.push_str(".sp\n");
|
||
}
|
||
}
|
||
Tag::Heading { level, .. } => {
|
||
if level == HeadingLevel::H1 {
|
||
self.push_top_header()?;
|
||
} else if level == HeadingLevel::H2 {
|
||
// Section header
|
||
let text = header_text(&mut self.parser)?;
|
||
self.flush();
|
||
write!(self.output, ".SH \"{}\"\n", text)?;
|
||
suppress_paragraph = true;
|
||
} else {
|
||
// Subsection header
|
||
let text = header_text(&mut self.parser)?;
|
||
self.flush();
|
||
write!(self.output, ".SS \"{}\"\n", text)?;
|
||
suppress_paragraph = true;
|
||
}
|
||
}
|
||
Tag::BlockQuote => {
|
||
self.flush();
|
||
// .RS = move left margin over 3
|
||
// .ll = shrink line length
|
||
self.output.push_str(".RS 3\n.ll -5\n.sp\n");
|
||
suppress_paragraph = true;
|
||
}
|
||
Tag::CodeBlock(_kind) => {
|
||
// space down, indent 4, no-fill mode
|
||
self.flush();
|
||
self.output.push_str(".sp\n.RS 4\n.nf\n");
|
||
}
|
||
Tag::List(start) => list.push(start),
|
||
Tag::Item => {
|
||
// Note: This uses explicit movement instead of .IP
|
||
// because the spacing on .IP looks weird to me.
|
||
// space down, indent 4
|
||
self.flush();
|
||
self.output.push_str(".sp\n.RS 4\n");
|
||
match list.last_mut().expect("item must have list start") {
|
||
// Ordered list.
|
||
Some(n) => {
|
||
// move left 4, output the list index number, move right 1.
|
||
write!(self.output, "\\h'-04' {}.\\h'+01'", n)?;
|
||
*n += 1;
|
||
}
|
||
// Unordered list.
|
||
None => self.output.push_str("\\h'-04'\\(bu\\h'+02'"),
|
||
}
|
||
suppress_paragraph = true;
|
||
}
|
||
Tag::FootnoteDefinition(_label) => unimplemented!(),
|
||
Tag::Table(alignment) => {
|
||
// Table start
|
||
// allbox = draw a box around all the cells
|
||
// tab(:) = Use `:` to separate cell data (instead of tab)
|
||
// ; = end of options
|
||
self.output.push_str(
|
||
"\n.TS\n\
|
||
allbox tab(:);\n",
|
||
);
|
||
let alignments: Vec<_> = alignment
|
||
.iter()
|
||
.map(|a| match a {
|
||
Alignment::Left | Alignment::None => "lt",
|
||
Alignment::Center => "ct",
|
||
Alignment::Right => "rt",
|
||
})
|
||
.collect();
|
||
self.output.push_str(&alignments.join(" "));
|
||
self.output.push_str(".\n");
|
||
table_cell_index = 0;
|
||
}
|
||
Tag::TableHead => {
|
||
table_cell_index = 0;
|
||
}
|
||
Tag::TableRow => {
|
||
table_cell_index = 0;
|
||
self.output.push('\n');
|
||
}
|
||
Tag::TableCell => {
|
||
if table_cell_index != 0 {
|
||
// Separator between columns.
|
||
self.output.push(':');
|
||
}
|
||
// Start a text block.
|
||
self.output.push_str("T{\n");
|
||
table_cell_index += 1
|
||
}
|
||
Tag::Emphasis => self.push_font(Font::Italic),
|
||
Tag::Strong => self.push_font(Font::Bold),
|
||
// Strikethrough isn't usually supported for TTY.
|
||
Tag::Strikethrough => self.output.push_str("~~"),
|
||
Tag::Link {
|
||
link_type,
|
||
dest_url,
|
||
..
|
||
} => {
|
||
last_seen_link_data = Some((link_type.clone(), dest_url.to_owned()));
|
||
if dest_url.starts_with('#') {
|
||
// In a man page, page-relative anchors don't
|
||
// have much meaning.
|
||
continue;
|
||
}
|
||
match link_type {
|
||
LinkType::Autolink | LinkType::Email => {
|
||
// The text is a copy of the URL, which is not needed.
|
||
match self.parser.next() {
|
||
Some((Event::Text(_), _range)) => {}
|
||
_ => bail!("expected text after autolink"),
|
||
}
|
||
}
|
||
LinkType::Inline
|
||
| LinkType::Reference
|
||
| LinkType::Collapsed
|
||
| LinkType::Shortcut => {
|
||
self.push_font(Font::Italic);
|
||
}
|
||
// This is currently unused. This is only
|
||
// emitted with a broken link callback, but I
|
||
// felt it is too annoying to escape `[` in
|
||
// option descriptions.
|
||
LinkType::ReferenceUnknown
|
||
| LinkType::CollapsedUnknown
|
||
| LinkType::ShortcutUnknown => {
|
||
bail!(
|
||
"link with missing reference `{}` located at offset {}",
|
||
dest_url,
|
||
range.start
|
||
);
|
||
}
|
||
}
|
||
}
|
||
Tag::Image { .. } => {
|
||
bail!("images are not currently supported")
|
||
}
|
||
Tag::HtmlBlock { .. } | Tag::MetadataBlock { .. } => {}
|
||
}
|
||
}
|
||
Event::End(tag_end) => {
|
||
match &tag_end {
|
||
TagEnd::Paragraph => self.flush(),
|
||
TagEnd::Heading(..) => {}
|
||
TagEnd::BlockQuote => {
|
||
self.flush();
|
||
// restore left margin, restore line length
|
||
self.output.push_str(".br\n.RE\n.ll\n");
|
||
}
|
||
TagEnd::CodeBlock => {
|
||
self.flush();
|
||
// Restore fill mode, move margin back one level.
|
||
self.output.push_str(".fi\n.RE\n");
|
||
}
|
||
TagEnd::List(_) => {
|
||
list.pop();
|
||
}
|
||
TagEnd::Item => {
|
||
self.flush();
|
||
// Move margin back one level.
|
||
self.output.push_str(".RE\n");
|
||
}
|
||
TagEnd::FootnoteDefinition => {}
|
||
TagEnd::Table => {
|
||
// Table end
|
||
// I don't know why, but the .sp is needed to provide
|
||
// space with the following content.
|
||
self.output.push_str("\n.TE\n.sp\n");
|
||
}
|
||
TagEnd::TableHead => {}
|
||
TagEnd::TableRow => {}
|
||
TagEnd::TableCell => {
|
||
// End text block.
|
||
self.output.push_str("\nT}");
|
||
}
|
||
TagEnd::Emphasis | TagEnd::Strong => self.pop_font(),
|
||
TagEnd::Strikethrough => self.output.push_str("~~"),
|
||
TagEnd::Link => {
|
||
if let Some((link_type, ref dest_url)) = last_seen_link_data {
|
||
if dest_url.starts_with('#') {
|
||
continue;
|
||
}
|
||
match link_type {
|
||
LinkType::Autolink | LinkType::Email => {}
|
||
LinkType::Inline
|
||
| LinkType::Reference
|
||
| LinkType::Collapsed
|
||
| LinkType::Shortcut => {
|
||
self.pop_font();
|
||
self.output.push(' ');
|
||
}
|
||
_ => {
|
||
panic!("unexpected tag {:?}", tag_end);
|
||
}
|
||
}
|
||
write!(self.output, "<{}>", escape(&dest_url)?)?;
|
||
}
|
||
}
|
||
TagEnd::Image | TagEnd::HtmlBlock | TagEnd::MetadataBlock(..) => {}
|
||
}
|
||
}
|
||
Event::Text(t) => {
|
||
self.output.push_str(&escape(&t)?);
|
||
}
|
||
Event::Code(t) => {
|
||
self.push_font(Font::Bold);
|
||
self.output.push_str(&escape(&t)?);
|
||
self.pop_font();
|
||
}
|
||
Event::Html(t) => {
|
||
if t.starts_with("<![CDATA[") {
|
||
// CDATA is a special marker used for handling options.
|
||
in_cdata = true;
|
||
} else if in_cdata {
|
||
if t.trim().ends_with("]]>") {
|
||
in_cdata = false;
|
||
} else if !t.trim().is_empty() {
|
||
self.output.push_str(&t);
|
||
}
|
||
} else {
|
||
self.output.push_str(&escape(&t)?);
|
||
}
|
||
}
|
||
Event::FootnoteReference(_t) => {}
|
||
Event::SoftBreak => self.output.push('\n'),
|
||
Event::HardBreak => {
|
||
self.flush();
|
||
self.output.push_str(".br\n");
|
||
}
|
||
Event::Rule => {
|
||
self.flush();
|
||
// \l' **length** ' Draw horizontal line (default underscore).
|
||
// \n(.lu Gets value from register "lu" (current line length)
|
||
self.output.push_str("\\l'\\n(.lu'\n");
|
||
}
|
||
Event::TaskListMarker(_b) => unimplemented!(),
|
||
Event::InlineHtml(..) => unimplemented!(),
|
||
}
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
fn flush(&mut self) {
|
||
if !self.output.ends_with('\n') {
|
||
self.output.push('\n');
|
||
}
|
||
}
|
||
|
||
/// Switch to the given font.
|
||
///
|
||
/// Because the troff sequence `\fP` for switching to the "previous" font
|
||
/// doesn't support nesting, this needs to emulate it here. This is needed
|
||
/// for situations like **hi _there_**.
|
||
fn push_font(&mut self, font: Font) {
|
||
self.font_stack.push(font);
|
||
self.output.push_str(Font::str_from_stack(&self.font_stack));
|
||
}
|
||
|
||
fn pop_font(&mut self) {
|
||
self.font_stack.pop();
|
||
self.output.push_str(Font::str_from_stack(&self.font_stack));
|
||
}
|
||
|
||
/// Parse and render the first top-level header of the document.
|
||
fn push_top_header(&mut self) -> Result<(), Error> {
|
||
// This enables the tbl preprocessor for tables.
|
||
// This seems to be enabled by default on every modern system I could
|
||
// find, but it doesn't seem to hurt to enable this.
|
||
self.output.push_str("'\\\" t\n");
|
||
// Extract the name of the man page.
|
||
let text = header_text(&mut self.parser)?;
|
||
let (name, section) = parse_name_and_section(&text)?;
|
||
// .TH = Table header
|
||
// .nh = disable hyphenation
|
||
// .ad l = Left-adjust mode (disable justified).
|
||
// .ss sets sentence_space_size to 0 (prevents double spaces after .
|
||
// if . is last on the line)
|
||
write!(
|
||
self.output,
|
||
".TH \"{}\" \"{}\"\n\
|
||
.nh\n\
|
||
.ad l\n\
|
||
.ss \\n[.ss] 0\n",
|
||
escape(&name.to_uppercase())?,
|
||
section
|
||
)?;
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
fn escape(s: &str) -> Result<String, Error> {
|
||
// Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html.
|
||
// Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`.
|
||
|
||
let mut replaced = s
|
||
.replace('\\', "\\(rs")
|
||
.replace('-', "\\-")
|
||
.replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable)
|
||
.replace('–', "\\[en]") // \u{2013} en-dash
|
||
.replace('—', "\\[em]") // \u{2014} em-dash
|
||
.replace('‘', "\\[oq]") // \u{2018} left single quote
|
||
.replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe
|
||
.replace('“', "\\[lq]") // \u{201C} left double quote
|
||
.replace('”', "\\[rq]") // \u{201D} right double quote
|
||
.replace('…', "\\[u2026]") // \u{2026} ellipsis
|
||
.replace('│', "|") // \u{2502} box drawing light vertical (could use \[br])
|
||
.replace('├', "|") // \u{251C} box drawings light vertical and right
|
||
.replace('└', "`") // \u{2514} box drawings light up and right
|
||
.replace('─', "\\-") // \u{2500} box drawing light horizontal
|
||
;
|
||
if replaced.starts_with('.') {
|
||
replaced = format!("\\&.{}", &replaced[1..]);
|
||
}
|
||
|
||
if let Some(ch) = replaced.chars().find(|ch| {
|
||
!matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9'
|
||
| ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~')
|
||
}) {
|
||
bail!(
|
||
"character {:?} is not allowed (update the translation table if needed)",
|
||
ch
|
||
);
|
||
}
|
||
Ok(replaced)
|
||
}
|