2022-02-17 05:47:38 +00:00
|
|
|
|
import CMark
|
|
|
|
|
import DocGen4.Output.Template
|
|
|
|
|
import Lean.Data.Parsec
|
2023-03-14 11:32:32 +00:00
|
|
|
|
import UnicodeBasic
|
2022-02-17 05:47:38 +00:00
|
|
|
|
|
2023-03-11 17:06:13 +00:00
|
|
|
|
open Lean Xml Parser Parsec DocGen4.Process
|
2022-02-17 05:47:38 +00:00
|
|
|
|
|
|
|
|
|
namespace DocGen4
|
|
|
|
|
namespace Output
|
|
|
|
|
|
2022-02-20 05:28:48 +00:00
|
|
|
|
/-- Auxiliary function for `splitAround`. -/
|
|
|
|
|
@[specialize] partial def splitAroundAux (s : String) (p : Char → Bool) (b i : String.Pos) (r : List String) : List String :=
|
|
|
|
|
if s.atEnd i then
|
|
|
|
|
let r := (s.extract b i)::r
|
|
|
|
|
r.reverse
|
|
|
|
|
else
|
|
|
|
|
let c := s.get i
|
|
|
|
|
if p c then
|
|
|
|
|
let i := s.next i
|
2022-04-09 17:18:21 +00:00
|
|
|
|
splitAroundAux s p i i (c.toString::s.extract b (i-⟨1⟩)::r)
|
2022-02-20 05:28:48 +00:00
|
|
|
|
else
|
|
|
|
|
splitAroundAux s p b (s.next i) r
|
|
|
|
|
|
|
|
|
|
/--
|
2022-11-05 17:18:16 +00:00
|
|
|
|
Similar to `String.split` in Lean core, but keeps the separater.
|
2023-01-01 18:51:01 +00:00
|
|
|
|
e.g. `splitAround "a,b,c" (fun c => c = ',') = ["a", ",", "b", ",", "c"]`
|
2022-02-20 05:28:48 +00:00
|
|
|
|
-/
|
|
|
|
|
def splitAround (s : String) (p : Char → Bool) : List String := splitAroundAux s p 0 0 []
|
|
|
|
|
|
2022-10-05 10:05:58 +00:00
|
|
|
|
instance : Inhabited Element := ⟨"", Lean.RBMap.empty, #[]⟩
|
2022-02-17 16:46:02 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/-- Parse an array of Xml/Html document from String. -/
|
2022-02-17 05:47:38 +00:00
|
|
|
|
def manyDocument : Parsec (Array Element) := many (prolog *> element <* many Misc) <* eof
|
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/--
|
|
|
|
|
Generate id for heading elements, with the following rules:
|
|
|
|
|
|
2022-02-20 05:28:48 +00:00
|
|
|
|
1. Characters in `letter`, `mark`, `number` and `symbol` unicode categories are preserved.
|
|
|
|
|
2. Any sequences of Characters in `punctuation`, `separator` and `other` categories are replaced by a single dash.
|
2022-02-19 19:14:58 +00:00
|
|
|
|
3. Cases (upper and lower) are preserved.
|
|
|
|
|
4. Xml/Html tags are ignored.
|
|
|
|
|
-/
|
|
|
|
|
partial def xmlGetHeadingId (el : Xml.Element) : String :=
|
|
|
|
|
elementToPlainText el |> replaceCharSeq unicodeToDrop "-"
|
2022-02-17 07:26:17 +00:00
|
|
|
|
where
|
2022-10-20 17:51:26 +00:00
|
|
|
|
elementToPlainText el := match el with
|
|
|
|
|
| (Element.Element _ _ contents) =>
|
2022-02-19 19:14:58 +00:00
|
|
|
|
"".intercalate (contents.toList.map contentToPlainText)
|
2022-02-17 07:26:17 +00:00
|
|
|
|
contentToPlainText c := match c with
|
|
|
|
|
| Content.Element el => elementToPlainText el
|
|
|
|
|
| Content.Comment _ => ""
|
|
|
|
|
| Content.Character s => s
|
2022-02-19 19:14:58 +00:00
|
|
|
|
replaceCharSeq pattern replacement s :=
|
|
|
|
|
s.split pattern
|
|
|
|
|
|>.filter (!·.isEmpty)
|
|
|
|
|
|> replacement.intercalate
|
2022-10-20 17:51:26 +00:00
|
|
|
|
unicodeToDrop (c : Char) : Bool :=
|
2023-03-11 17:06:13 +00:00
|
|
|
|
let cats := [
|
|
|
|
|
Unicode.GeneralCategory.P, -- punctuation
|
2023-03-27 12:38:13 +00:00
|
|
|
|
Unicode.GeneralCategory.Z, -- separator
|
2023-03-11 17:06:13 +00:00
|
|
|
|
Unicode.GeneralCategory.C -- other
|
|
|
|
|
]
|
2023-03-11 19:57:14 +00:00
|
|
|
|
cats.any (Unicode.isInGeneralCategory c)
|
2022-02-17 07:26:17 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/--
|
|
|
|
|
This function try to find the given name, both globally and in current module.
|
2022-02-17 07:26:17 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
For global search, a precise name is need. If the global search fails, the function
|
|
|
|
|
tries to find a local one that ends with the given search name.
|
|
|
|
|
-/
|
|
|
|
|
def nameToLink? (s : String) : HtmlM (Option String) := do
|
2022-02-17 16:46:02 +00:00
|
|
|
|
let res ← getResult
|
2023-09-13 19:56:04 +00:00
|
|
|
|
if s.endsWith ".lean" && s.contains '/' then
|
|
|
|
|
return (← getRoot) ++ s.dropRight 5 ++ ".html"
|
|
|
|
|
else if let some name := Lean.Syntax.decodeNameLit ("`" ++ s) then
|
2022-02-19 21:03:44 +00:00
|
|
|
|
-- with exactly the same name
|
|
|
|
|
if res.name2ModIdx.contains name then
|
|
|
|
|
declNameToLink name
|
2022-02-20 05:28:48 +00:00
|
|
|
|
-- module name
|
|
|
|
|
else if res.moduleNames.contains name then
|
|
|
|
|
moduleNameToLink name
|
2022-02-19 21:03:44 +00:00
|
|
|
|
-- find similar name in the same module
|
|
|
|
|
else
|
|
|
|
|
match (← getCurrentName) with
|
|
|
|
|
| some currentName =>
|
2023-02-16 18:51:35 +00:00
|
|
|
|
match res.moduleInfo.find! currentName |>.members |> filterDocInfo |>.find? (sameEnd ·.getName name) with
|
2022-10-20 17:51:26 +00:00
|
|
|
|
| some info =>
|
2022-02-19 21:03:44 +00:00
|
|
|
|
declNameToLink info.getName
|
2023-01-01 18:51:01 +00:00
|
|
|
|
| _ => return none
|
|
|
|
|
| _ => return none
|
2022-02-19 21:03:44 +00:00
|
|
|
|
else
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return none
|
2022-02-19 19:28:03 +00:00
|
|
|
|
where
|
2022-02-19 21:03:44 +00:00
|
|
|
|
-- check if two names have the same ending components
|
|
|
|
|
sameEnd n1 n2 :=
|
2022-10-20 17:51:26 +00:00
|
|
|
|
List.zip n1.componentsRev n2.componentsRev
|
2023-01-01 18:51:01 +00:00
|
|
|
|
|>.all fun ⟨a, b⟩ => a == b
|
2022-02-17 16:46:02 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/--
|
|
|
|
|
Extend links with following rules:
|
|
|
|
|
|
|
|
|
|
1. if the link starts with `##`, a name search is used and will panic if not found
|
|
|
|
|
2. if the link starts with `#`, it's treated as id link, no modification
|
|
|
|
|
3. if the link starts with `http`, it's an absolute one, no modification
|
|
|
|
|
4. otherwise it's a relative link, extend it with base url
|
|
|
|
|
-/
|
|
|
|
|
def extendLink (s : String) : HtmlM String := do
|
2022-02-17 19:26:38 +00:00
|
|
|
|
-- for intra doc links
|
|
|
|
|
if s.startsWith "##" then
|
2022-02-19 19:14:58 +00:00
|
|
|
|
if let some link ← nameToLink? (s.drop 2) then
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return link
|
2022-02-17 16:46:02 +00:00
|
|
|
|
else
|
2022-02-17 19:26:38 +00:00
|
|
|
|
panic! s!"Cannot find {s.drop 2}, only full name and abbrev in current module is supported"
|
2022-10-20 17:51:26 +00:00
|
|
|
|
-- for id
|
2022-02-17 19:26:38 +00:00
|
|
|
|
else if s.startsWith "#" then
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return s
|
2022-02-17 16:46:02 +00:00
|
|
|
|
-- for absolute and relative urls
|
2022-10-20 17:51:26 +00:00
|
|
|
|
else if s.startsWith "http" then
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return s
|
|
|
|
|
else return ((← getRoot) ++ s)
|
2022-02-17 16:46:02 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/-- Add attributes for heading. -/
|
|
|
|
|
def addHeadingAttributes (el : Element) (modifyElement : Element → HtmlM Element) : HtmlM Element := do
|
|
|
|
|
match el with
|
|
|
|
|
| Element.Element name attrs contents => do
|
|
|
|
|
let id := xmlGetHeadingId el
|
2022-10-05 10:05:58 +00:00
|
|
|
|
let anchorAttributes := Lean.RBMap.empty
|
2022-02-19 19:14:58 +00:00
|
|
|
|
|>.insert "class" "hover-link"
|
|
|
|
|
|>.insert "href" s!"#{id}"
|
|
|
|
|
let anchor := Element.Element "a" anchorAttributes #[Content.Character "#"]
|
|
|
|
|
let newAttrs := attrs
|
|
|
|
|
|>.insert "id" id
|
|
|
|
|
|>.insert "class" "markdown-heading"
|
2022-10-20 17:51:26 +00:00
|
|
|
|
let newContents := (←
|
2023-01-01 18:51:01 +00:00
|
|
|
|
contents.mapM (fun c => match c with
|
2022-08-18 09:31:18 +00:00
|
|
|
|
| Content.Element e => return Content.Element (← modifyElement e)
|
2022-02-19 19:14:58 +00:00
|
|
|
|
| _ => pure c))
|
|
|
|
|
|>.push (Content.Character " ")
|
|
|
|
|
|>.push (Content.Element anchor)
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return ⟨ name, newAttrs, newContents⟩
|
2022-02-19 19:14:58 +00:00
|
|
|
|
|
|
|
|
|
/-- Extend anchor links. -/
|
|
|
|
|
def extendAnchor (el : Element) : HtmlM Element := do
|
|
|
|
|
match el with
|
|
|
|
|
| Element.Element name attrs contents =>
|
2022-08-18 09:31:18 +00:00
|
|
|
|
let newAttrs ← match attrs.find? "href" with
|
|
|
|
|
| some href => pure (attrs.insert "href" (← extendLink href))
|
2022-02-19 19:14:58 +00:00
|
|
|
|
| none => pure attrs
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return ⟨ name, newAttrs, contents⟩
|
2022-02-19 19:14:58 +00:00
|
|
|
|
|
|
|
|
|
/-- Automatically add intra documentation link for inline code span. -/
|
|
|
|
|
def autoLink (el : Element) : HtmlM Element := do
|
|
|
|
|
match el with
|
|
|
|
|
| Element.Element name attrs contents =>
|
|
|
|
|
let mut newContents := #[]
|
|
|
|
|
for c in contents do
|
|
|
|
|
match c with
|
|
|
|
|
| Content.Character s =>
|
2022-02-20 05:28:48 +00:00
|
|
|
|
newContents := newContents ++ (← splitAround s unicodeToSplit |>.mapM linkify).join
|
2022-02-19 19:14:58 +00:00
|
|
|
|
| _ => newContents := newContents.push c
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return ⟨ name, attrs, newContents ⟩
|
2022-02-19 19:14:58 +00:00
|
|
|
|
where
|
|
|
|
|
linkify s := do
|
|
|
|
|
let link? ← nameToLink? s
|
|
|
|
|
match link? with
|
2022-10-20 17:51:26 +00:00
|
|
|
|
| some link =>
|
2022-10-05 10:05:58 +00:00
|
|
|
|
let attributes := Lean.RBMap.empty.insert "href" link
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return [Content.Element <| Element.Element "a" attributes #[Content.Character s]]
|
2022-02-20 05:28:48 +00:00
|
|
|
|
| none =>
|
2023-01-01 18:51:01 +00:00
|
|
|
|
let sHead := s.dropRightWhile (· != '.')
|
|
|
|
|
let sTail := s.takeRightWhile (· != '.')
|
2022-02-20 05:28:48 +00:00
|
|
|
|
let link'? ← nameToLink? sTail
|
|
|
|
|
match link'? with
|
2022-10-20 17:51:26 +00:00
|
|
|
|
| some link' =>
|
2022-10-05 10:05:58 +00:00
|
|
|
|
let attributes := Lean.RBMap.empty.insert "href" link'
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return [
|
2022-02-20 05:28:48 +00:00
|
|
|
|
Content.Character sHead,
|
2022-07-23 11:01:25 +00:00
|
|
|
|
Content.Element <| Element.Element "a" attributes #[Content.Character sTail]
|
2022-02-20 05:28:48 +00:00
|
|
|
|
]
|
|
|
|
|
| none =>
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return [Content.Character s]
|
2022-10-20 17:51:26 +00:00
|
|
|
|
unicodeToSplit (c : Char) : Bool :=
|
2023-03-11 17:06:13 +00:00
|
|
|
|
let cats := [
|
2023-03-27 12:38:13 +00:00
|
|
|
|
Unicode.GeneralCategory.Z, -- separator
|
2023-03-11 17:06:13 +00:00
|
|
|
|
Unicode.GeneralCategory.C -- other
|
|
|
|
|
]
|
2023-03-11 19:57:14 +00:00
|
|
|
|
cats.any (Unicode.isInGeneralCategory c)
|
2023-03-11 17:06:13 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/-- Core function of modifying the cmark rendered docstring html. -/
|
2022-02-20 05:28:48 +00:00
|
|
|
|
partial def modifyElement (element : Element) : HtmlM Element :=
|
2022-02-17 16:46:02 +00:00
|
|
|
|
match element with
|
|
|
|
|
| el@(Element.Element name attrs contents) => do
|
|
|
|
|
-- add id and class to <h_></h_>
|
|
|
|
|
if name = "h1" ∨ name = "h2" ∨ name = "h3" ∨ name = "h4" ∨ name = "h5" ∨ name = "h6" then
|
2022-02-19 19:14:58 +00:00
|
|
|
|
addHeadingAttributes el modifyElement
|
2022-02-17 16:46:02 +00:00
|
|
|
|
-- extend relative href for <a></a>
|
|
|
|
|
else if name = "a" then
|
2022-02-19 19:14:58 +00:00
|
|
|
|
extendAnchor el
|
2022-02-17 16:46:02 +00:00
|
|
|
|
-- auto link for inline <code></code>
|
2022-11-05 17:46:20 +00:00
|
|
|
|
else if name = "code" ∧
|
|
|
|
|
-- don't linkify code blocks explicitly tagged with a language other than lean
|
2022-11-06 20:27:26 +00:00
|
|
|
|
(((attrs.find? "class").getD "").splitOn.all (fun s => s == "language-lean" || !s.startsWith "language-")) then
|
2022-02-19 19:14:58 +00:00
|
|
|
|
autoLink el
|
2022-02-17 16:46:02 +00:00
|
|
|
|
-- recursively modify
|
|
|
|
|
else
|
2023-01-01 18:51:01 +00:00
|
|
|
|
let newContents ← contents.mapM fun c => match c with
|
2022-08-18 09:31:18 +00:00
|
|
|
|
| Content.Element e => return Content.Element (← modifyElement e)
|
2022-02-17 16:46:02 +00:00
|
|
|
|
| _ => pure c
|
2023-01-01 18:51:01 +00:00
|
|
|
|
return ⟨ name, attrs, newContents ⟩
|
2022-02-17 05:47:38 +00:00
|
|
|
|
|
2022-02-19 19:14:58 +00:00
|
|
|
|
/-- Convert docstring to Html. -/
|
2022-02-17 13:26:02 +00:00
|
|
|
|
def docStringToHtml (s : String) : HtmlM (Array Html) := do
|
2023-10-16 16:27:36 +00:00
|
|
|
|
let rendered := CMark.renderHtml (Html.escape s)
|
2022-02-17 13:26:02 +00:00
|
|
|
|
match manyDocument rendered.mkIterator with
|
2022-10-20 17:51:26 +00:00
|
|
|
|
| Parsec.ParseResult.success _ res =>
|
2023-01-01 18:51:01 +00:00
|
|
|
|
res.mapM fun x => do return Html.text <| toString (← modifyElement x)
|
2023-10-16 16:29:39 +00:00
|
|
|
|
| _ => return #[Html.text rendered]
|
2022-02-17 05:47:38 +00:00
|
|
|
|
|
|
|
|
|
end Output
|
|
|
|
|
end DocGen4
|