···33PANDOC ?= pandoc
4455pandoc_media_dir = media
66-# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh).
66+# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh) and conversion script (/maintainers/scripts/db-to-md.sh).
77# TODO: Remove raw-attribute when we can get rid of DocBook altogether.
88pandoc_commonmark_enabled_extensions = +attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute
99# Not needed:
+88
maintainers/scripts/db-to-md.sh
···11+#! /usr/bin/env nix-shell
22+#! nix-shell -I nixpkgs=. -i bash -p pandoc
33+44+# This script is temporarily needed while we transition the manual to
55+# CommonMark. It converts DocBook files into our CommonMark flavour.
66+77+debug=
88+files=()
99+1010+while [ "$#" -gt 0 ]; do
1111+ i="$1"; shift 1
1212+ case "$i" in
1313+ --debug)
1414+ debug=1
1515+ ;;
1616+ *)
1717+ files+=("$i")
1818+ ;;
1919+ esac
2020+done
2121+2222+echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr
2323+echo "Please report any issues you discover." > /dev/stderr
2424+2525+outExtension="md"
2626+if [[ $debug ]]; then
2727+ outExtension="json"
2828+fi
2929+3030+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
3131+3232+# NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile).
3333+# TODO: Remove raw-attribute when we can get rid of DocBook altogether.
3434+pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute
3535+targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart"
3636+if [[ $debug ]]; then
3737+ targetLang=json
3838+fi
3939+pandoc_flags=(
4040+ # Not needed:
4141+ # - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies)
4242+ # - media extraction (was only required for diagram generator)
4343+ # - myst-reader/roles.lua (only relevant for MyST → DocBook)
4444+ # - link-unix-man-references.lua (links should only be added to display output)
4545+ # - docbook-writer/rst-roles.lua (only relevant for → DocBook)
4646+ # - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook)
4747+ "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua"
4848+ "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua"
4949+ "--lua-filter=$DIR/doc/unknown-code-language.lua"
5050+ -f docbook
5151+ -t "$targetLang"
5252+ --tab-stop=2
5353+ --wrap=none
5454+)
5555+5656+for file in "${files[@]}"; do
5757+ if [[ ! -f "$file" ]]; then
5858+ echo "db-to-md.sh: $file does not exist" > /dev/stderr
5959+ exit 1
6060+ else
6161+ rootElement=$(xmllint --xpath 'name(//*)' "$file")
6262+6363+ if [[ $rootElement = chapter ]]; then
6464+ extension=".chapter.$outExtension"
6565+ elif [[ $rootElement = section ]]; then
6666+ extension=".section.$outExtension"
6767+ else
6868+ echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr
6969+ exit 1
7070+ fi
7171+7272+ outFile="${file%".section.xml"}"
7373+ outFile="${outFile%".chapter.xml"}"
7474+ outFile="${outFile%".xml"}$extension"
7575+ temp1=$(mktemp)
7676+ $DIR/doc/escape-code-markup.py "$file" "$temp1"
7777+ if [[ $debug ]]; then
7878+ echo "Converted $file to $temp1" > /dev/stderr
7979+ fi
8080+ temp2=$(mktemp)
8181+ $DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2"
8282+ if [[ $debug ]]; then
8383+ echo "Converted $temp1 to $temp2" > /dev/stderr
8484+ fi
8585+ pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}"
8686+ echo "Converted $file to $outFile" > /dev/stderr
8787+ fi
8888+done
+97
maintainers/scripts/doc/escape-code-markup.py
···11+#! /usr/bin/env nix-shell
22+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml
33+44+"""
55+Pandoc will strip any markup within code elements so
66+let’s escape them so that they can be handled manually.
77+"""
88+99+import lxml.etree as ET
1010+import re
1111+import sys
1212+1313+def replace_element_by_text(el: ET.Element, text: str) -> None:
1414+ """
1515+ Author: bernulf
1616+ Source: https://stackoverflow.com/a/10520552/160386
1717+ SPDX-License-Identifier: CC-BY-SA-3.0
1818+ """
1919+ text = text + (el.tail or "")
2020+ parent = el.getparent()
2121+ if parent is not None:
2222+ previous = el.getprevious()
2323+ if previous is not None:
2424+ previous.tail = (previous.tail or "") + text
2525+ else:
2626+ parent.text = (parent.text or "") + text
2727+ parent.remove(el)
2828+2929+DOCBOOK_NS = "http://docbook.org/ns/docbook"
3030+3131+# List of elements that pandoc’s DocBook reader strips markup from.
3232+# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs
3333+code_elements = [
3434+ # CodeBlock
3535+ "literallayout",
3636+ "screen",
3737+ "programlisting",
3838+ # Code (inline)
3939+ "classname",
4040+ "code",
4141+ "filename",
4242+ "envar",
4343+ "literal",
4444+ "computeroutput",
4545+ "prompt",
4646+ "parameter",
4747+ "option",
4848+ "markup",
4949+ "wordasword",
5050+ "command",
5151+ "varname",
5252+ "function",
5353+ "type",
5454+ "symbol",
5555+ "constant",
5656+ "userinput",
5757+ "systemitem",
5858+]
5959+6060+XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')
6161+ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')
6262+6363+def remove_xmlns(match: re.Match) -> str:
6464+ """
6565+ Removes xmlns attributes.
6666+6767+ Expects a match containing an opening tag.
6868+ """
6969+ return XMLNS_REGEX.sub('', match.group(0))
7070+7171+if __name__ == '__main__':
7272+ assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>"
7373+7474+ tree = ET.parse(sys.argv[1])
7575+ name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements])
7676+7777+ for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"):
7878+ text = ET.tostring(markup, encoding=str)
7979+8080+ # tostring adds xmlns attributes to the element we want to stringify
8181+ # as if it was supposed to be usable standalone.
8282+ # We are just converting it to CDATA so we do not care.
8383+ # Let’s strip the namespace declarations to keep the code clean.
8484+ #
8585+ # Note that this removes even namespaces that were potentially
8686+ # in the original file. Though, that should be very rare –
8787+ # most of the time, we will stringify empty DocBook elements
8888+ # like <xref> or <co> or, at worst, <link> with xlink:href attribute.
8989+ #
9090+ # Also note that the regex expects the root element to be first
9191+ # thing in the string. But that should be fine, the tostring method
9292+ # does not produce XML declaration or doctype by default.
9393+ text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text)
9494+9595+ replace_element_by_text(markup, text)
9696+9797+ tree.write(sys.argv[2])
···11+#! /usr/bin/env nix-shell
22+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml
33+44+"""
55+Pandoc will try to resolve xrefs and replace them with regular links.
66+let’s replace them with links with empty labels which MyST
77+and our pandoc filters recognize as cross-references.
88+"""
99+1010+import lxml.etree as ET
1111+import sys
1212+1313+XLINK_NS = "http://www.w3.org/1999/xlink"
1414+1515+ns = {
1616+ "db": "http://docbook.org/ns/docbook",
1717+}
1818+1919+2020+if __name__ == '__main__':
2121+ assert len(sys.argv) >= 3, "usage: replace-xrefs-by-empty-links.py <input> <output>"
2222+2323+ tree = ET.parse(sys.argv[1])
2424+ for xref in tree.findall(".//db:xref", ns):
2525+ text = ET.tostring(xref, encoding=str)
2626+ parent = xref.getparent()
2727+ link = parent.makeelement('link')
2828+ target_name = xref.get("linkend")
2929+ link.set(f"{{{XLINK_NS}}}href", f"#{target_name}")
3030+ parent.replace(xref, link)
3131+3232+ tree.write(sys.argv[2])
+12
maintainers/scripts/doc/unknown-code-language.lua
···11+--[[
22+Adds “unknown” class to CodeBlock AST nodes without any classes.
33+44+This will cause Pandoc to use fenced code block, which we prefer.
55+]]
66+77+function CodeBlock(elem)
88+ if #elem.classes == 0 then
99+ elem.classes:insert('unknown')
1010+ return elem
1111+ end
1212+end