;;; Copyright (c) 2008 Clozure Associates. All Rights Reserved. ;;; ;;; (doc-splitter:split-doc-file "ccl:doc;ccl-documentation.html" "ccl:doc;manual;") ;;; (eval-when (eval compile load) (defpackage doc-splitter (:use common-lisp ccl) (:export #:split-doc-file))) (in-package doc-splitter) (defparameter *output-template* " Clozure CL Documentation
((PREVIOUS)) ((NEXT)) ((HOME)) ((GLOSSARY)) ((INDEX))

((BODY))
((PREVIOUS)) ((NEXT)) ((HOME)) ((GLOSSARY)) ((INDEX))
") (defparameter *links-bgcolor* "lightgray") (defparameter *link-names* '((:previous . "Previous") (:next . "Next") (:up . "Up") (:home . "Table of Contents") (:glossary . "Glossary") (:index . "Index"))) (defun output-split-doc-header-link (stream sf link) (let ((name (cdr (assq link *link-names*)))) (if sf (format stream "~a~@[ ~a~]" (split-file-name sf) name (and (memq link '(:previous :next)) (if (eq (split-file-type sf) :sect1) "Section" "Chapter"))) (format stream "~:(~a~)" name)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct node start end) ;; Text node (defstruct (tnode (:include node)) ) ;; Compound node (defstruct (cnode (:include node)) tag tag-end children) (defmethod print-object ((node cnode) stream) (print-unreadable-object (node stream :type t) (format stream "~s ~s:~s:~s~a" (cnode-tag node) (cnode-start node) (cnode-tag-end node) (cnode-end node) (cond ((null (cnode-children node)) "") ((null (cdr (cnode-children node))) " 1 child") (t (format nil " ~s children" (length (cnode-children node)))))))) (defun node-tag (node) (and (cnode-p node) (cnode-tag node))) ;; Toplevel node (defstruct html string node) (defmethod print-object ((node html) stream) (print-unreadable-object (node stream :type t) (let ((*print-string-length* 400)) (format stream ":STRING ~s :NODE ~s" (html-string node) (html-node node))))) (defstruct split-file type name up nodes) (defvar *cur-html* nil) (defun split-doc-file (html directory) (unless (html-p html) (setq html (read-html-file html))) (ensure-directories-exist directory) (let* ((*cur-html* html) (splits (doc-file-splits html)) (id-table (make-hash-table :test #'equal)) (top (find :book splits :key #'split-file-type)) (glossary (find :glossary splits :key #'split-file-type)) (index (find :symbol-index splits :key #'split-file-type))) (loop for sf in splits as name = (split-file-name sf) do (loop for node in (split-file-nodes sf) do (doc-file-register-ids node name id-table))) (loop for prev = nil then sf for prev-chap = nil then (if (eq (split-file-type sf) :sect1) prev-chap sf) for sfs on splits for sf = (car sfs) do (with-open-file (stream (merge-pathnames (split-file-name sf) directory) :direction :output :if-exists :supersede) (output-split-doc-file sf stream id-table :previous (if (eq (split-file-type sf) :sect1) prev prev-chap) :next (if (eq (split-file-type sf) :sect1) (cadr sfs) (find :sect1 (cdr sfs) :key #'split-file-type :test #'neq)) :top top :glossary glossary :index index))))) (defun output-split-doc-file (sf stream id-table &key previous next top glossary index) (loop with template = *output-template* for start = 0 then (+ epos 2) as bpos = (search "((" template :start2 start) while bpos as epos = (search "))" template :start2 bpos) do (write-string template stream :start start :end bpos) do (ecase (intern (subseq template (+ bpos 2) epos) :keyword) (:previous (output-split-doc-header-link stream previous :previous)) (:next (output-split-doc-header-link stream next :next)) (:home (output-split-doc-header-link stream top :home)) (:glossary (output-split-doc-header-link stream glossary :glossary)) (:index (output-split-doc-header-link stream index :index)) (:bgcolor (write-string *links-bgcolor* stream)) (:body (output-split-doc-file-body stream sf id-table))) finally (write-string template stream :start start))) ;; (setq *print-string-length* 400 *print-length* 100 *print-level* 50) (defun read-html-file (pathname) (with-open-file (stream pathname) (let ((str (make-string (file-length stream)))) (read-sequence str stream) (make-html :string str :node (read-html-form str (search "~a" (split-file-name up) up-title))) (loop with string = (html-string *cur-html*) for node in (split-file-nodes sf) do (let ((hrefs (doc-file-collect-hrefs node id-table))) (setq hrefs (sort hrefs #'< :key #'car)) (assert (or (null hrefs) (<= (node-start node) (caar hrefs)))) (loop as start = (node-start node) then pos for (pos . name) in hrefs do (write-string string stream :start start :end pos) do (write-string name stream) finally (write-string string stream :start start :end (node-end node))) (fresh-line stream)))) (defun doc-file-register-ids (node name hash) (when (cnode-p node) (let ((id (and (eq (cnode-tag node) :a) (cnode-attribute-value node :id)))) (when id (let ((old (gethash id hash))) (when old (warn "~s already registered in file ~s" id old))) (setf (gethash id hash) name))) (loop for subnode in (cnode-children node) do (doc-file-register-ids subnode name hash)))) (defun doc-file-collect-hrefs (node hash) (when (cnode-p node) (let* ((hrefs (loop for subnode in (cnode-children node) nconc (doc-file-collect-hrefs subnode hash))) (href (and (eq (cnode-tag node) :a) (cnode-attribute-value node :href)))) (when (and href (eql 0 (position #\# href))) (let ((name (gethash (subseq href 1) hash))) (unless name (warn "Couldn't find the split file id for href ~s" href)) (when name (let ((pos (search (format nil "href=~s" href) (html-string *cur-html*) :start2 (cnode-start node) :end2 (cnode-tag-end node)))) (assert pos) (push (cons (+ pos 6) name) hrefs))))) hrefs))) (defparameter *times* 0) (defun split-file-title (sf) (labels ((title (node) (when (cnode-p node) (if (and (eq (cnode-tag node) :h2) (equal (cnode-attribute-value node :class) "title")) (labels ((text (node) (if (tnode-p node) (subseq (html-string *cur-html*) (node-start node) (node-end node)) (apply #'concatenate 'string (loop for sub in (cnode-children node) collect (text sub)))))) (text node)) (loop for sub in (cnode-children node) thereis (title sub)))))) (loop for node in (split-file-nodes sf) thereis (title node)))) (defun doc-file-splits (html) (let* ((*cur-html* html) (node (html-node html))) (assert (eq (node-tag node) :html)) (setq node (find :body (cnode-children node) :key #'node-tag)) (assert node) (setq node (find :div (cnode-children node) :key #'node-tag)) (assert node) (assert (equal (cnode-attribute-value node :class) "book")) (loop with nchapters = 0 for subnode in (cnode-children node) as class = (and (eq (node-tag subnode) :div) (cnode-attribute-value subnode :class)) if (member class '("chapter" "glossary" "index") :test #'equal) nconc (doc-file-chapter-splits subnode (incf nchapters)) into sections else collect subnode into nodes finally (let ((sf (make-split-file :name "index.html" :type :book :nodes nodes))) (loop for sub in sections unless (eq (split-file-type sub) :sect1) do (setf (split-file-up sub) sf)) (return (cons sf sections)))))) (defun doc-file-chapter-splits (node num) (let* ((class (and (eq (node-tag node) :div) (cnode-attribute-value node :class)))) (cond ((equal class "chapter") (loop with nsect = 0 for subnode in (cnode-children node) as class = (and (eq (node-tag subnode) :div) (cnode-attribute-value subnode :class)) if (equal class "sect1") collect (make-split-file :name (format nil "chapter~d.~d.html" num (incf nsect)) :type :sect1 :nodes (list subnode)) into sections else collect subnode into nodes finally (let ((sf (make-split-file :name (format nil "chapter~d.html" num) :type :chapter :nodes nodes))) (loop for sub in sections do (setf (split-file-up sub) sf)) (return (cons sf sections))))) ((equal class "glossary") (list (make-split-file :name "glossary.html" :type :glossary :nodes (list node)))) ((equal class "index") (list (make-split-file :name "symbol-index.html" :type :symbol-index :nodes (list node)))) (t (error "expected a chapter, glossary or index: ~s" class))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Returns NIL for case. (defun read-html-tag (str s e &aux (s1 (1+ s))) (and (< s e) (eq (char str s) #\<) (let* ((te (or (position-if #'(lambda (ch) (or (whitespacep ch) (char= ch #\>) (char= ch #\/))) str :start s1 :end e) e))) (and (< s1 te) (intern (nstring-upcase (subseq str s1 te)) ccl::*keyword-package*))))) ;; Returns NIL if at end of buffer or if looking at " str s e))) (node (make-cnode :tag tag :start s :tag-end te :end e :children nil))) (if (eq (char str (- te 2)) #\/) (setf (node-end node) te) (read-html-children-into-cnode str node)) node)) ((>= s e) NIL) ((eq (char str s) #\<) (assert (and (< (1+ s) e) (eq (char str (1+ s)) #\/))) NIL) (t (make-tnode :start s :end (or (position #\< str :start s :end e) e))))) (defun position-ignoring-strings (ch str start end) (let* ((p (position ch str :start start :end end))) (and p (let ((q (position #\" str :start start :end p))) (if (null q) p (let ((qe (position #\" str :start (1+ q) :end end))) (and qe (position-ignoring-strings ch str (1+ qe) end)))))))) (defun read-html-children-into-cnode (str node) ;; This is entered with node-end = end of region, and it updates both ;; cnode-children and node-end. Eats up the ending tag if it matches ;; the node tag, otherwise leaves it to be re-read. (let* ((s (cnode-tag-end node)) (e (cnode-end node))) (loop (assert (< s e) () "Unended tag ~S" (subseq str (cnode-start node) e)) (when (string= " str :start s :end e)))) (setf (cnode-end node) (if (string-equal str (symbol-name (cnode-tag node)) :start1 (+ s 2) :end1 (1- te)) te s)) (return))) (let* ((ntag (read-html-tag str s e)) (child (read-html-form str s e ntag))) (setq s (node-end child)) (push child (cnode-children node)))) (setf (cnode-children node) (nreverse (cnode-children node))))) (defun cnode-attributes (node &optional string-or-html &aux string) (setq string-or-html (or string-or-html *cur-html*)) (setq string (if (html-p string-or-html) (html-string string-or-html) string-or-html)) (multiple-value-bind (start end) (let* ((start (1+ (node-start node))) (end (cnode-tag-end node)) (word-end (position-if #'(lambda (ch) (or (whitespacep ch) (char= ch #\>) (char= ch #\/))) string :start start :end end))) (assert word-end) (values word-end (1- end))) (flet ((next-token (type) (when (setq start (position-if-not #'whitespacep string :start start :end end)) (let ((ch (char string start))) (incf start) (case ch ((#\" #\') (assert (eq type :value)) (let ((tend (position ch string :start start :end end))) (prog1 (subseq string start tend) (setq start (1+ tend))))) ((#\=) (assert (eq type :separator)) t) ((nil) (assert (or (eq type :attribute) (eq type :separator))) nil) (t (assert (or (eq type :value) (eq type :attribute))) (let ((tend (or (position-if #'(lambda (ch) (or (whitespacep ch) (eql ch #\=))) string :start start :end end) end))) (prog1 (subseq string (1- start) tend) (setq start tend))))))))) (loop as attribute = (next-token :attribute) while attribute collect (cons (intern (string-upcase attribute) :keyword) (if (next-token :separator) (next-token :value) t)))))) (defun cnode-attribute-value (node attribute &optional string-or-html) (cdr (assoc attribute (cnode-attributes node string-or-html) :test #'eq))) #+debugging (defun debug-print-html (str node &key (stream t) (depth nil)) (when (html-p str) (setq str (html-string str))) (if (null stream) (with-output-to-string (s) (debug-print-html str node :stream s :depth depth)) (labels ((print (node cur-depth) (etypecase node (tnode (format stream "~A" (subseq str (node-start node) (node-end node)))) (cnode (format stream "~A" (subseq str (node-start node) (cnode-tag-end node))) (if (or (null depth) (< cur-depth depth)) (dolist (child (cnode-children node)) (print child (1+ cur-depth))) (format stream "...")) (format stream "" (node-tag node)))))) (print node 0)))) #+debugging (defun debug-outline-html (str node &key (stream t) (depth nil)) (if (null stream) (with-output-to-string (s) (debug-outline-html str node s depth)) (labels ((outline (node cur-depth idx) (etypecase node (tnode (unless (loop for i from (node-start node) below (node-end node) always (whitespacep (char str i))) (if idx (format stream "[~a]..." idx) (format stream "...")))) (cnode (fresh-line stream) (if idx (format stream "~&[~a]" idx) (format stream "~&")) (dotimes (i cur-depth) (write-char #\Space stream)) (format stream "<~A ~:a>" (cnode-tag node) (cnode-attributes node str)) (when (or (null depth) (< cur-depth depth)) (loop for i upfrom 0 as child in (cnode-children node) do (outline child (1+ cur-depth) (if idx (format nil "~a.~d" idx i) (format nil "~d" i))))) (format stream "" (node-tag node)))))) (outline node 0 nil))))