automarkup.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. # SPDX-License-Identifier: GPL-2.0
  2. # Copyright 2019 Jonathan Corbet <corbet@lwn.net>
  3. #
  4. # Apply kernel-specific tweaks after the initial document processing
  5. # has been done.
  6. #
  7. from docutils import nodes
  8. import sphinx
  9. from sphinx import addnodes
  10. from sphinx.errors import NoUri
  11. import re
  12. from itertools import chain
  13. #
  14. # Python 2 lacks re.ASCII...
  15. #
  16. try:
  17. ascii_p3 = re.ASCII
  18. except AttributeError:
  19. ascii_p3 = 0
  20. #
  21. # Regex nastiness. Of course.
  22. # Try to identify "function()" that's not already marked up some
  23. # other way. Sphinx doesn't like a lot of stuff right after a
  24. # :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
  25. # bit tries to restrict matches to things that won't create trouble.
  26. #
  27. RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3)
  28. #
  29. # Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
  30. #
  31. RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
  32. flags=ascii_p3)
  33. #
  34. # Sphinx 3 uses a different C role for each one of struct, union, enum and
  35. # typedef
  36. #
  37. RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
  38. RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
  39. RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
  40. RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
  41. #
  42. # Detects a reference to a documentation page of the form Documentation/... with
  43. # an optional extension
  44. #
  45. RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)')
  46. RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$')
  47. #
  48. # Reserved C words that we should skip when cross-referencing
  49. #
  50. Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
  51. #
  52. # Many places in the docs refer to common system calls. It is
  53. # pointless to try to cross-reference them and, as has been known
  54. # to happen, somebody defining a function by these names can lead
  55. # to the creation of incorrect and confusing cross references. So
  56. # just don't even try with these names.
  57. #
  58. Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap',
  59. 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl',
  60. 'socket' ]
  61. c_namespace = ''
  62. #
  63. # Detect references to commits.
  64. #
  65. RE_git = re.compile(r'commit\s+(?P<rev>[0-9a-f]{12,40})(?:\s+\(".*?"\))?',
  66. flags=re.IGNORECASE | re.DOTALL)
  67. def markup_refs(docname, app, node):
  68. t = node.astext()
  69. done = 0
  70. repl = [ ]
  71. #
  72. # Associate each regex with the function that will markup its matches
  73. #
  74. markup_func_sphinx2 = {RE_doc: markup_doc_ref,
  75. RE_function: markup_c_ref,
  76. RE_generic_type: markup_c_ref}
  77. markup_func_sphinx3 = {RE_doc: markup_doc_ref,
  78. RE_function: markup_func_ref_sphinx3,
  79. RE_struct: markup_c_ref,
  80. RE_union: markup_c_ref,
  81. RE_enum: markup_c_ref,
  82. RE_typedef: markup_c_ref,
  83. RE_git: markup_git}
  84. if sphinx.version_info[0] >= 3:
  85. markup_func = markup_func_sphinx3
  86. else:
  87. markup_func = markup_func_sphinx2
  88. match_iterators = [regex.finditer(t) for regex in markup_func]
  89. #
  90. # Sort all references by the starting position in text
  91. #
  92. sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start())
  93. for m in sorted_matches:
  94. #
  95. # Include any text prior to match as a normal text node.
  96. #
  97. if m.start() > done:
  98. repl.append(nodes.Text(t[done:m.start()]))
  99. #
  100. # Call the function associated with the regex that matched this text and
  101. # append its return to the text
  102. #
  103. repl.append(markup_func[m.re](docname, app, m))
  104. done = m.end()
  105. if done < len(t):
  106. repl.append(nodes.Text(t[done:]))
  107. return repl
  108. #
  109. # Keep track of cross-reference lookups that failed so we don't have to
  110. # do them again.
  111. #
  112. failed_lookups = { }
  113. def failure_seen(target):
  114. return (target) in failed_lookups
  115. def note_failure(target):
  116. failed_lookups[target] = True
  117. #
  118. # In sphinx3 we can cross-reference to C macro and function, each one with its
  119. # own C role, but both match the same regex, so we try both.
  120. #
  121. def markup_func_ref_sphinx3(docname, app, match):
  122. cdom = app.env.domains['c']
  123. #
  124. # Go through the dance of getting an xref out of the C domain
  125. #
  126. base_target = match.group(2)
  127. target_text = nodes.Text(match.group(0))
  128. xref = None
  129. possible_targets = [base_target]
  130. # Check if this document has a namespace, and if so, try
  131. # cross-referencing inside it first.
  132. if c_namespace:
  133. possible_targets.insert(0, c_namespace + "." + base_target)
  134. if base_target not in Skipnames:
  135. for target in possible_targets:
  136. if (target not in Skipfuncs) and not failure_seen(target):
  137. lit_text = nodes.literal(classes=['xref', 'c', 'c-func'])
  138. lit_text += target_text
  139. pxref = addnodes.pending_xref('', refdomain = 'c',
  140. reftype = 'function',
  141. reftarget = target,
  142. modname = None,
  143. classname = None)
  144. #
  145. # XXX The Latex builder will throw NoUri exceptions here,
  146. # work around that by ignoring them.
  147. #
  148. try:
  149. xref = cdom.resolve_xref(app.env, docname, app.builder,
  150. 'function', target, pxref,
  151. lit_text)
  152. except NoUri:
  153. xref = None
  154. if xref:
  155. return xref
  156. note_failure(target)
  157. return target_text
  158. def markup_c_ref(docname, app, match):
  159. class_str = {# Sphinx 2 only
  160. RE_function: 'c-func',
  161. RE_generic_type: 'c-type',
  162. # Sphinx 3+ only
  163. RE_struct: 'c-struct',
  164. RE_union: 'c-union',
  165. RE_enum: 'c-enum',
  166. RE_typedef: 'c-type',
  167. }
  168. reftype_str = {# Sphinx 2 only
  169. RE_function: 'function',
  170. RE_generic_type: 'type',
  171. # Sphinx 3+ only
  172. RE_struct: 'struct',
  173. RE_union: 'union',
  174. RE_enum: 'enum',
  175. RE_typedef: 'type',
  176. }
  177. cdom = app.env.domains['c']
  178. #
  179. # Go through the dance of getting an xref out of the C domain
  180. #
  181. base_target = match.group(2)
  182. target_text = nodes.Text(match.group(0))
  183. xref = None
  184. possible_targets = [base_target]
  185. # Check if this document has a namespace, and if so, try
  186. # cross-referencing inside it first.
  187. if c_namespace:
  188. possible_targets.insert(0, c_namespace + "." + base_target)
  189. if base_target not in Skipnames:
  190. for target in possible_targets:
  191. if not (match.re == RE_function and target in Skipfuncs):
  192. lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
  193. lit_text += target_text
  194. pxref = addnodes.pending_xref('', refdomain = 'c',
  195. reftype = reftype_str[match.re],
  196. reftarget = target, modname = None,
  197. classname = None)
  198. #
  199. # XXX The Latex builder will throw NoUri exceptions here,
  200. # work around that by ignoring them.
  201. #
  202. try:
  203. xref = cdom.resolve_xref(app.env, docname, app.builder,
  204. reftype_str[match.re], target, pxref,
  205. lit_text)
  206. except NoUri:
  207. xref = None
  208. if xref:
  209. return xref
  210. return target_text
  211. #
  212. # Try to replace a documentation reference of the form Documentation/... with a
  213. # cross reference to that page
  214. #
  215. def markup_doc_ref(docname, app, match):
  216. stddom = app.env.domains['std']
  217. #
  218. # Go through the dance of getting an xref out of the std domain
  219. #
  220. absolute = match.group(1)
  221. target = match.group(2)
  222. if absolute:
  223. target = "/" + target
  224. xref = None
  225. pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
  226. reftarget = target, modname = None,
  227. classname = None, refexplicit = False)
  228. #
  229. # XXX The Latex builder will throw NoUri exceptions here,
  230. # work around that by ignoring them.
  231. #
  232. try:
  233. xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc',
  234. target, pxref, None)
  235. except NoUri:
  236. xref = None
  237. #
  238. # Return the xref if we got it; otherwise just return the plain text.
  239. #
  240. if xref:
  241. return xref
  242. else:
  243. return nodes.Text(match.group(0))
  244. def get_c_namespace(app, docname):
  245. source = app.env.doc2path(docname)
  246. with open(source) as f:
  247. for l in f:
  248. match = RE_namespace.search(l)
  249. if match:
  250. return match.group(1)
  251. return ''
  252. def markup_git(docname, app, match):
  253. # While we could probably assume that we are running in a git
  254. # repository, we can't know for sure, so let's just mechanically
  255. # turn them into git.kernel.org links without checking their
  256. # validity. (Maybe we can do something in the future to warn about
  257. # these references if this is explicitly requested.)
  258. text = match.group(0)
  259. rev = match.group('rev')
  260. return nodes.reference('', nodes.Text(text),
  261. refuri=f'https://git.kernel.org/torvalds/c/{rev}')
  262. def auto_markup(app, doctree, name):
  263. global c_namespace
  264. c_namespace = get_c_namespace(app, name)
  265. def text_but_not_a_reference(node):
  266. # The nodes.literal test catches ``literal text``, its purpose is to
  267. # avoid adding cross-references to functions that have been explicitly
  268. # marked with cc:func:.
  269. if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal):
  270. return False
  271. child_of_reference = False
  272. parent = node.parent
  273. while parent:
  274. if isinstance(parent, nodes.Referential):
  275. child_of_reference = True
  276. break
  277. parent = parent.parent
  278. return not child_of_reference
  279. #
  280. # This loop could eventually be improved on. Someday maybe we
  281. # want a proper tree traversal with a lot of awareness of which
  282. # kinds of nodes to prune. But this works well for now.
  283. #
  284. for para in doctree.traverse(nodes.paragraph):
  285. for node in para.traverse(condition=text_but_not_a_reference):
  286. node.parent.replace(node, markup_refs(name, app, node))
  287. def setup(app):
  288. app.connect('doctree-resolved', auto_markup)
  289. return {
  290. 'parallel_read_safe': True,
  291. 'parallel_write_safe': True,
  292. }