diff --git a/CHANGELOG.md b/CHANGELOG.md index 59155254d7..b76ea286ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Upcoming release + +### Added +- Added [`canonical-site-url`](https://rust-lang.github.io/mdBook/format/configuration/renderers.html?highlight=canonical-site-url#html-renderer-options) setting, to set `` in the HTML output of each page. + ## mdBook 0.4.51 [v0.4.50...v0.4.51](https://github.com/rust-lang/mdBook/compare/v0.4.50...v0.4.51) diff --git a/guide/src/format/configuration/renderers.md b/guide/src/format/configuration/renderers.md index a827d2936f..890009e3cf 100644 --- a/guide/src/format/configuration/renderers.md +++ b/guide/src/format/configuration/renderers.md @@ -164,6 +164,12 @@ The following configuration options are available: navigation links and script/css imports in the 404 file work correctly, even when accessing urls in subdirectories. Defaults to `/`. If `site-url` is set, make sure to use document relative links for your assets, meaning they should not start with `/`. +- **canonical-site-url:** Set the canonical URL for the book, which is used by + search engines to determine the primary URL for the content. Use this when + your site is deployed at multiple URLs. For example, when you have site + deployments for a range of versions, you can point all of them to the URL for + the latest version. Without this, your content may be penalized for + duplication, and visitors may be directed to an outdated version of the book. - **cname:** The DNS subdomain or apex domain at which your book will be hosted. This string will be written to a file named CNAME in the root of your site, as required by GitHub Pages (see [*Managing a custom domain for your GitHub Pages diff --git a/src/config.rs b/src/config.rs index 7ef8bcef12..9b6aa2e186 100644 --- a/src/config.rs +++ b/src/config.rs @@ -582,6 +582,8 @@ pub struct HtmlConfig { pub input_404: Option, /// Absolute url to site, used to emit correct paths for the 404 page, which might be accessed in a deeply nested directory pub site_url: Option, + /// Canonical site url, used to emit tags in the HTML. + pub canonical_site_url: Option, /// The DNS subdomain or apex domain at which your book will be hosted. This /// string will be written to a file named CNAME in the root of your site, /// as required by GitHub Pages (see [*Managing a custom domain for your @@ -632,6 +634,7 @@ impl Default for HtmlConfig { edit_url_template: None, input_404: None, site_url: None, + canonical_site_url: None, cname: None, live_reload_endpoint: None, redirect: HashMap::new(), diff --git a/src/front-end/templates/index.hbs b/src/front-end/templates/index.hbs index d30be2741d..95d3612417 100644 --- a/src/front-end/templates/index.hbs +++ b/src/front-end/templates/index.hbs @@ -10,6 +10,9 @@ {{#if base_url}} {{/if}} + {{#if canonical_url}} + + {{/if}} diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index a144b32b57..32cb1fc452 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -78,6 +78,13 @@ impl HtmlHandlebars { .to_str() .with_context(|| "Could not convert path to str")?; let filepath = Path::new(&ctx_path).with_extension("html"); + let filepath_str = filepath + .to_str() + .with_context(|| format!("Could not convert path to str: {}", filepath.display()))?; + let canonical_url = ctx.html_config.canonical_site_url.map(|canon_url| { + let canon_url = canon_url.as_str().trim_end_matches('/'); + format!("{}/{}", canon_url, self.clean_path(filepath_str)) + }); // "print.html" is used for the print page. if path == Path::new("print.md") { @@ -99,6 +106,8 @@ impl HtmlHandlebars { }; ctx.data.insert("path".to_owned(), json!(path)); + ctx.data + .insert("canonical_url".to_owned(), json!(canonical_url)); ctx.data.insert("content".to_owned(), json!(content)); ctx.data.insert("chapter_title".to_owned(), json!(ch.name)); ctx.data.insert("title".to_owned(), json!(title)); @@ -316,6 +325,15 @@ impl HtmlHandlebars { Ok(()) } + + /// Strips `index.html` from the end of a path, if it exists. + fn clean_path(&self, path: &str) -> String { + if path == "index.html" || path.ends_with("/index.html") { + path[..path.len() - 10].to_string() + } else { + path.to_string() + } + } } impl Renderer for HtmlHandlebars { diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index 1d6cf476e8..fccd3f46c6 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -41,3 +41,25 @@ fn first_chapter_is_copied_as_index_even_if_not_first_elem() { ]], ); } + +// Checks that a canonical URL is generated correctly. +#[test] +fn canonical_url() { + BookTest::from_dir("rendering/canonical_url") + .check_file_contains( + "book/index.html", + "", + ) + .check_file_contains( + "book/canonical_url.html", + "", + ) + .check_file_contains( + "book/nested/page.html", + "", + ) + .check_file_contains( + "book/nested/index.html", + "", + ); +} diff --git a/tests/testsuite/rendering/canonical_url/book.toml b/tests/testsuite/rendering/canonical_url/book.toml new file mode 100644 index 0000000000..3074184f5f --- /dev/null +++ b/tests/testsuite/rendering/canonical_url/book.toml @@ -0,0 +1,6 @@ +[book] +title = "canonical_url test" + +[output.html] +# trailing slash is not necessary or recommended, but tested here +canonical-site-url = "https://example.com/test/" diff --git a/tests/testsuite/rendering/canonical_url/src/SUMMARY.md b/tests/testsuite/rendering/canonical_url/src/SUMMARY.md new file mode 100644 index 0000000000..4829ef85c3 --- /dev/null +++ b/tests/testsuite/rendering/canonical_url/src/SUMMARY.md @@ -0,0 +1,4 @@ +- [Intro](README.md) +- [Canonical URL](canonical_url.md) +- [Nested Page](nested/page.md) +- [Nested Index](nested/index.md)