···33PANDOC ?= pandoc4455pandoc_media_dir = media66-# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh).66+# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh) and conversion script (/maintainers/scripts/db-to-md.sh).77# TODO: Remove raw-attribute when we can get rid of DocBook altogether.88pandoc_commonmark_enabled_extensions = +attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute99# Not needed:
+88
maintainers/scripts/db-to-md.sh
···11+#! /usr/bin/env nix-shell22+#! nix-shell -I nixpkgs=. -i bash -p pandoc33+44+# This script is temporarily needed while we transition the manual to55+# CommonMark. It converts DocBook files into our CommonMark flavour.66+77+debug=88+files=()99+1010+while [ "$#" -gt 0 ]; do1111+ i="$1"; shift 11212+ case "$i" in1313+ --debug)1414+ debug=11515+ ;;1616+ *)1717+ files+=("$i")1818+ ;;1919+ esac2020+done2121+2222+echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr2323+echo "Please report any issues you discover." > /dev/stderr2424+2525+outExtension="md"2626+if [[ $debug ]]; then2727+ outExtension="json"2828+fi2929+3030+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"3131+3232+# NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile).3333+# TODO: Remove raw-attribute when we can get rid of DocBook altogether.3434+pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute3535+targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart"3636+if [[ $debug ]]; then3737+ targetLang=json3838+fi3939+pandoc_flags=(4040+ # Not needed:4141+ # - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies)4242+ # - media extraction (was only required for diagram generator)4343+ # - myst-reader/roles.lua (only relevant for MyST → DocBook)4444+ # - link-unix-man-references.lua (links should only be added to display output)4545+ # - docbook-writer/rst-roles.lua (only relevant for → DocBook)4646+ # - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook)4747+ "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua"4848+ "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua"4949+ "--lua-filter=$DIR/doc/unknown-code-language.lua"5050+ -f docbook5151+ -t "$targetLang"5252+ --tab-stop=25353+ --wrap=none5454+)5555+5656+for file in "${files[@]}"; do5757+ if [[ ! -f "$file" ]]; then5858+ echo "db-to-md.sh: $file does not exist" > /dev/stderr5959+ exit 16060+ else6161+ rootElement=$(xmllint --xpath 'name(//*)' "$file")6262+6363+ if [[ $rootElement = chapter ]]; then6464+ extension=".chapter.$outExtension"6565+ elif [[ $rootElement = section ]]; then6666+ extension=".section.$outExtension"6767+ else6868+ echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr6969+ exit 17070+ fi7171+7272+ outFile="${file%".section.xml"}"7373+ outFile="${outFile%".chapter.xml"}"7474+ outFile="${outFile%".xml"}$extension"7575+ temp1=$(mktemp)7676+ $DIR/doc/escape-code-markup.py "$file" "$temp1"7777+ if [[ $debug ]]; then7878+ echo "Converted $file to $temp1" > /dev/stderr7979+ fi8080+ temp2=$(mktemp)8181+ $DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2"8282+ if [[ $debug ]]; then8383+ echo "Converted $temp1 to $temp2" > /dev/stderr8484+ fi8585+ pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}"8686+ echo "Converted $file to $outFile" > /dev/stderr8787+ fi8888+done
+97
maintainers/scripts/doc/escape-code-markup.py
···11+#! /usr/bin/env nix-shell22+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml33+44+"""55+Pandoc will strip any markup within code elements so66+let’s escape them so that they can be handled manually.77+"""88+99+import lxml.etree as ET1010+import re1111+import sys1212+1313+def replace_element_by_text(el: ET.Element, text: str) -> None:1414+ """1515+ Author: bernulf1616+ Source: https://stackoverflow.com/a/10520552/1603861717+ SPDX-License-Identifier: CC-BY-SA-3.01818+ """1919+ text = text + (el.tail or "")2020+ parent = el.getparent()2121+ if parent is not None:2222+ previous = el.getprevious()2323+ if previous is not None:2424+ previous.tail = (previous.tail or "") + text2525+ else:2626+ parent.text = (parent.text or "") + text2727+ parent.remove(el)2828+2929+DOCBOOK_NS = "http://docbook.org/ns/docbook"3030+3131+# List of elements that pandoc’s DocBook reader strips markup from.3232+# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs3333+code_elements = [3434+ # CodeBlock3535+ "literallayout",3636+ "screen",3737+ "programlisting",3838+ # Code (inline)3939+ "classname",4040+ "code",4141+ "filename",4242+ "envar",4343+ "literal",4444+ "computeroutput",4545+ "prompt",4646+ "parameter",4747+ "option",4848+ "markup",4949+ "wordasword",5050+ "command",5151+ "varname",5252+ "function",5353+ "type",5454+ "symbol",5555+ "constant",5656+ "userinput",5757+ "systemitem",5858+]5959+6060+XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')6161+ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')6262+6363+def remove_xmlns(match: re.Match) -> str:6464+ """6565+ Removes xmlns attributes.6666+6767+ Expects a match containing an opening tag.6868+ """6969+ return XMLNS_REGEX.sub('', match.group(0))7070+7171+if __name__ == '__main__':7272+ assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>"7373+7474+ tree = ET.parse(sys.argv[1])7575+ name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements])7676+7777+ for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"):7878+ text = ET.tostring(markup, encoding=str)7979+8080+ # tostring adds xmlns attributes to the element we want to stringify8181+ # as if it was supposed to be usable standalone.8282+ # We are just converting it to CDATA so we do not care.8383+ # Let’s strip the namespace declarations to keep the code clean.8484+ #8585+ # Note that this removes even namespaces that were potentially8686+ # in the original file. Though, that should be very rare –8787+ # most of the time, we will stringify empty DocBook elements8888+ # like <xref> or <co> or, at worst, <link> with xlink:href attribute.8989+ #9090+ # Also note that the regex expects the root element to be first9191+ # thing in the string. But that should be fine, the tostring method9292+ # does not produce XML declaration or doctype by default.9393+ text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text)9494+9595+ replace_element_by_text(markup, text)9696+9797+ tree.write(sys.argv[2])
···11+#! /usr/bin/env nix-shell22+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml33+44+"""55+Pandoc will try to resolve xrefs and replace them with regular links.66+let’s replace them with links with empty labels which MyST77+and our pandoc filters recognize as cross-references.88+"""99+1010+import lxml.etree as ET1111+import sys1212+1313+XLINK_NS = "http://www.w3.org/1999/xlink"1414+1515+ns = {1616+ "db": "http://docbook.org/ns/docbook",1717+}1818+1919+2020+if __name__ == '__main__':2121+ assert len(sys.argv) >= 3, "usage: replace-xrefs-by-empty-links.py <input> <output>"2222+2323+ tree = ET.parse(sys.argv[1])2424+ for xref in tree.findall(".//db:xref", ns):2525+ text = ET.tostring(xref, encoding=str)2626+ parent = xref.getparent()2727+ link = parent.makeelement('link')2828+ target_name = xref.get("linkend")2929+ link.set(f"{{{XLINK_NS}}}href", f"#{target_name}")3030+ parent.replace(xref, link)3131+3232+ tree.write(sys.argv[2])
+12
maintainers/scripts/doc/unknown-code-language.lua
···11+--[[22+Adds “unknown” class to CodeBlock AST nodes without any classes.33+44+This will cause Pandoc to use fenced code block, which we prefer.55+]]66+77+function CodeBlock(elem)88+ if #elem.classes == 0 then99+ elem.classes:insert('unknown')1010+ return elem1111+ end1212+end