Initial commit
This commit is contained in:
BIN
DroidSansMonoSlashed.ttf
Normal file
BIN
DroidSansMonoSlashed.ttf
Normal file
Binary file not shown.
26
README.adoc
Normal file
26
README.adoc
Normal file
@@ -0,0 +1,26 @@
|
||||
= DEC documentation processor
|
||||
|
||||
== Intro
|
||||
This converts DEC-derived .txt files from whatever tool they used to generate documents to PDF including
|
||||
a table of contents (which is handy with 400-odd page documents)
|
||||
|
||||
The code is terrible, and a non-trivial amount of the heavy lifting (including
|
||||
all the page layout) is done in PostScript, but it works for the Mass
|
||||
Storage Control Protocol documents from bitsavers, so I'm happy.
|
||||
|
||||
== Usage
|
||||
|
||||
....
|
||||
python3 docproc.py -o mscp.ps mscp.txt
|
||||
ps2pdf mscp.ps mscp.pdf
|
||||
....
|
||||
|
||||
In order to change the font (which you probably will need to do), edit `prelude.ps` and look for the line that has `findfont` on it.
|
||||
|
||||
If you want the line numbers included, change `/zDN false def` to `/zDN true def` in `prelude.ps`. Depending on the font you're using, you may or may not also need to change the font size (`/zCH`)
|
||||
|
||||
== Disclaimer
|
||||
This was a weekend hack, so don't expect much readability.
|
||||
If it breaks, good luck with that. If you fix it, send a patch.
|
||||
|
||||
It also doesn't do quite as much as you might want: inline section references are ignored, tables aren't linked (though that)
|
||||
332
docproc.py
Normal file
332
docproc.py
Normal file
@@ -0,0 +1,332 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
import io
|
||||
from typing import TextIO, Union
|
||||
|
||||
import click
|
||||
|
||||
|
||||
@dataclass
|
||||
class Line:
|
||||
line_no: str
|
||||
is_change: bool
|
||||
content: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Outline:
|
||||
dest_name: str
|
||||
label: str
|
||||
subitems: list["Outline"]
|
||||
|
||||
roman_lookup = []
|
||||
roman_lookup_rev = {}
|
||||
roman_lookup_re = re.compile("")
|
||||
|
||||
def build_roman():
|
||||
roman_lookup_raw = [
|
||||
"/i/ii/iii/iv/v/vi/vii/ix",
|
||||
"/x/xx/xxx/xl/l/lx/lxx/xc",
|
||||
"/c/cc/ccc/cd/d/dc/dcc/cm",
|
||||
"/m/mm/mmm"
|
||||
]
|
||||
re_parts = []
|
||||
for i,pats in enumerate(roman_lookup_raw):
|
||||
pats = pats.split('/')
|
||||
base = 10**i
|
||||
roman_lookup.append((base, pats))
|
||||
re_parts.append("(" + "|".join(pats) + ")")
|
||||
for j, pat in enumerate(pats):
|
||||
roman_lookup_rev[pat] = base * j
|
||||
roman_lookup_re = re.compile("^" + "".join(roman_lookup) + "$", re.IGNORECASE)
|
||||
|
||||
def romanize(n):
|
||||
res = []
|
||||
for base, pats in roman_lookup:
|
||||
res.append(pats[(n // base) % 10])
|
||||
return "".join(reversed(res))
|
||||
|
||||
def unromanize(s):
|
||||
m = roman_lookup_re.match(s)
|
||||
if m:
|
||||
res = 0
|
||||
for item in m.groups()[1:]:
|
||||
res += int(roman_lookup_rev[item])
|
||||
return res
|
||||
raise ValueError(f"{s!r} is not a valid roman numeral")
|
||||
|
||||
ps_str_escapes = re.compile(r'([()\\])')
|
||||
def ps_string(s):
|
||||
s = ps_str_escapes.sub(r'\\\1', s)
|
||||
return f"({s})"
|
||||
|
||||
|
||||
|
||||
class Processor:
|
||||
line1pat = re.compile(r'^\s*Digital Equipment Corporation\s\s+Confidential And Proprietary\s*$')
|
||||
ch_pat = re.compile(r'^\s*((?:\S|\s\S)+)\s\s+Page (\S+)')
|
||||
toc_chapter_re = re.compile(r'^\s*(CHAPTER \d+|APPENDIX [A-Z]+|FIGURES|TABLES)(?:\s+((?:\S| \S)+))?\s*$')
|
||||
toc_section_re = re.compile(r'^\s*((?:\d+|[A-Z])(?:(?:\.\d+)+|-\d+))\s+((?:\S|\s[^. ])+)\s+\s*(?:\. )*\s*([A-Z]-\d+|\d+-\d+)$')
|
||||
bdy_chapter_re = re.compile(r'^\s*(?:CHAPTER|APENDIX)\s*(\d+|[A-Z]+)\s*$')
|
||||
bdy_section_re = re.compile(r'^\s*((?:[A-Z]|\d+)(?:\.\d+)+)\s\s((?:\S|\s\S)+)\s*$')
|
||||
|
||||
# Config settings
|
||||
out: TextIO = sys.stdout
|
||||
guess_page = True
|
||||
ln_cols = 6
|
||||
change_col = 8
|
||||
include_num = False
|
||||
number_color = 0.5
|
||||
header_color = 0.5
|
||||
marginalia = 11
|
||||
header_lines = 4
|
||||
trailer_lines = 1
|
||||
|
||||
fontsize = 10
|
||||
linespc = fontsize * 1.2
|
||||
|
||||
lines: list[Line] = []
|
||||
|
||||
# running state
|
||||
last_chapter = None
|
||||
is_toc = False
|
||||
last_page = None
|
||||
page_n: int = 1
|
||||
input_line: int = 0
|
||||
pg_start: int = 0
|
||||
toc_pfx: str = "sec."
|
||||
|
||||
chapters: dict[str, str]
|
||||
outline: list[Outline]
|
||||
|
||||
def __init__(self, file: Union[TextIO, str]):
|
||||
if isinstance(file, str):
|
||||
self.out = open(file, "wt")
|
||||
else:
|
||||
self.out = file
|
||||
|
||||
# Emit front-matter
|
||||
with open("prelude.ps", "rt") as prelude:
|
||||
for line in prelude:
|
||||
self.emit(line.strip("\n"))
|
||||
|
||||
self.outline = []
|
||||
self.chapters = {}
|
||||
|
||||
def do_line(self, line):
|
||||
if '\f' in line:
|
||||
self.guess_page = False
|
||||
segments = line.split('\f')
|
||||
for segment in segments[:-1]:
|
||||
if segment != "":
|
||||
self.real_do_line(segment)
|
||||
self.flush_page()
|
||||
self.real_do_line(segments[-1])
|
||||
else:
|
||||
self.real_do_line(line)
|
||||
self.input_line += 1
|
||||
|
||||
def real_do_line(self, line):
|
||||
# Expand tabs
|
||||
if '\t' in line:
|
||||
pad = " " * 8
|
||||
segs = []
|
||||
for seg in line.split('\t'):
|
||||
segs.append(seg)
|
||||
segs.append(pad[:8-(len(seg)%8)])
|
||||
line = "".join(segs)
|
||||
if self.guess_page and self.line1pat.match(line):
|
||||
self.flush_page()
|
||||
is_change = len(line) > self.change_col and line[self.change_col] != ' '
|
||||
if len(line) >= self.marginalia:
|
||||
line_no = line[:self.ln_cols]
|
||||
content = line[self.marginalia:]
|
||||
else:
|
||||
line_no = (line + " " * self.ln_cols)[:self.ln_cols]
|
||||
content = ""
|
||||
self.lines.append(Line(line_no = line_no, content=content, is_change=is_change))
|
||||
|
||||
def flush_page(self):
|
||||
height = len(self.lines)
|
||||
width = max(len(line.content) for line in self.lines)
|
||||
|
||||
last_chapter = self.last_chapter
|
||||
chapter_m = self.ch_pat.match(self.lines[2].content)
|
||||
if chapter_m is not None:
|
||||
self.last_chapter = chapter = chapter_m.group(1)
|
||||
page = chapter_m.group(2)
|
||||
else:
|
||||
print(f"Warning: no page number found on pg {self.pg_start}", file=sys.stderr)
|
||||
chapter = last_chapter
|
||||
page = self.guess_next_page()
|
||||
|
||||
self.last_page = page
|
||||
|
||||
if chapter != last_chapter:
|
||||
if chapter == "CONTENTS":
|
||||
self.is_toc = True
|
||||
else:
|
||||
self.is_toc = False
|
||||
|
||||
lines = list(enumerate(self.lines))
|
||||
links = []
|
||||
dests = []
|
||||
|
||||
if self.is_toc:
|
||||
# Process TOC entries
|
||||
partial_toc = ""
|
||||
partial_toc_start = None
|
||||
for i, line in lines[self.header_lines:-self.trailer_lines]:
|
||||
if line.content.strip() == "":
|
||||
partial_toc_start = None
|
||||
partial_toc = ""
|
||||
continue
|
||||
elif line.content.strip() == "CONTENTS":
|
||||
dests.append((i, "sec.CONTENTS"))
|
||||
self.outline.append(Outline("sec.CONTENTS", "CONTENTS", []))
|
||||
continue
|
||||
elif m := self.toc_chapter_re.match(line.content):
|
||||
label = m.group(1)
|
||||
title = m.group(2)
|
||||
if ' ' in label:
|
||||
num = label.split(' ')[1]
|
||||
else:
|
||||
num = label
|
||||
if label == "FIGURES":
|
||||
self.toc_pfx = "sec.FIGURES."
|
||||
dests.append((i, "sec.FIGURES"))
|
||||
elif label == "TABLES":
|
||||
self.toc_pfx = "sec.TABLES."
|
||||
dests.append((i, "sec.TABLES"))
|
||||
partial_toc_start = None
|
||||
partial_toc = ""
|
||||
left = len(line.content) - len(line.content.lstrip())
|
||||
right = len(line.content.rstrip())
|
||||
links.append((left, i, right, i, f"sec.{num}"))
|
||||
self.outline.append(Outline(f"sec.{num}", f"{label} - {title}" if title else label, []))
|
||||
continue
|
||||
|
||||
# TODO: This won't work if an entry is split across pages.
|
||||
# To fix, partial_toc and partial_toc_start must be saved after/restored before
|
||||
# this function
|
||||
if partial_toc_start is not None:
|
||||
partial_toc += " " + line.content.strip()
|
||||
else:
|
||||
partial_toc = line.content.rstrip()
|
||||
partial_toc_start = i
|
||||
|
||||
if m := self.toc_section_re.match(partial_toc):
|
||||
num = m.group(1)
|
||||
title = m.group(2)
|
||||
left = partial_toc.index(num[0])
|
||||
right = len(line.content.rstrip())
|
||||
links.append((left, partial_toc_start, right, i, f"{self.toc_pfx}{num}"))
|
||||
self.add_outline(self.toc_pfx + num, title)
|
||||
partial_toc = ""
|
||||
partial_toc_start = None
|
||||
elif partial_toc:
|
||||
print(f"No match: {partial_toc!r}", file=sys.stderr)
|
||||
else:
|
||||
# Process body lines
|
||||
partial_toc = ""
|
||||
partial_toc_start = None
|
||||
for i, line in lines[self.header_lines:-self.trailer_lines]:
|
||||
if line.content.strip() == "":
|
||||
partial_toc_start = None
|
||||
partial_toc = ""
|
||||
continue
|
||||
elif m := self.bdy_chapter_re.match(line.content):
|
||||
num = "sec." + m.group(1)
|
||||
dests.append((i, num))
|
||||
|
||||
if partial_toc_start is not None:
|
||||
partial_toc += " " + line.content.strip()
|
||||
else:
|
||||
partial_toc = line.content.rstrip()
|
||||
partial_toc_start = i
|
||||
|
||||
if m := self.bdy_section_re.match(partial_toc):
|
||||
num = m.group(1)
|
||||
dests.append((partial_toc_start, "sec." + num))
|
||||
|
||||
|
||||
|
||||
|
||||
# Begin emitting page
|
||||
self.emit(f"%%Page: {ps_string(page)} {self.page_n}")
|
||||
self.emit(f"{height} {width} {ps_string(page)} bP")
|
||||
|
||||
|
||||
for i, line in lines[:self.header_lines]:
|
||||
self.emit(f"{i} gL {ps_string(line.content)} tH")
|
||||
|
||||
for i, line in lines[self.header_lines:-self.trailer_lines]:
|
||||
chg = " mC" if line.is_change else ""
|
||||
self.emit(f"{i} gL {ps_string(line.line_no)} tN {ps_string(line.content)} tB{chg}")
|
||||
|
||||
for i, line in lines[-self.trailer_lines:]:
|
||||
self.emit(f"{i} gL {ps_string(line.content)} tH")
|
||||
|
||||
for link in links:
|
||||
self.emit(f"{link[0]} {link[1]} {link[2]} {link[3]} {ps_string(link[4])} mL")
|
||||
for dest in dests:
|
||||
self.emit(f"{dest[0]} {ps_string(dest[1])} mD")
|
||||
|
||||
self.emit("sP")
|
||||
|
||||
self.lines = []
|
||||
self.pg_start = self.input_line
|
||||
|
||||
def guess_next_page(self):
|
||||
if self.last_page is not None:
|
||||
if m := re.match(r"^(\d*-)(\d*)$", self.last_page):
|
||||
return f"{m.group(1)}{1+int(m.group(2))}"
|
||||
else:
|
||||
return romanize(1 + unromanize(self.last_page))
|
||||
else:
|
||||
return "i"
|
||||
|
||||
def emit(self, s):
|
||||
print(s, file=self.out)
|
||||
|
||||
def emit_trailer(self):
|
||||
self.emit("%%Trailer")
|
||||
self.emit_outline(self.outline)
|
||||
self.emit("%%EOF")
|
||||
|
||||
def emit_outline(self, outline: list[Outline]):
|
||||
for item in outline:
|
||||
self.emit(f"{ps_string(item.dest_name)} {ps_string(item.label)} {len(item.subitems)} mO")
|
||||
self.emit_outline(item.subitems)
|
||||
|
||||
def add_outline(self, dest, title):
|
||||
entry = Outline(dest, title, [])
|
||||
typ = dest.split('.', 1)[0]
|
||||
|
||||
def recur(pfx, elist: list[Outline]):
|
||||
for item in elist:
|
||||
if dest.startswith(item.dest_name + "."):
|
||||
recur(item.dest_name, item.subitems)
|
||||
break
|
||||
else:
|
||||
if typ == "sec":
|
||||
assert dest.rsplit(".", 1)[0] == pfx
|
||||
elist.append(entry)
|
||||
recur(typ, self.outline)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("-o", "--output", type=click.File("w"))
|
||||
@click.argument("input", type=click.File("r"), default="-")
|
||||
def main(output, input):
|
||||
proc = Processor(output or sys.stdout)
|
||||
for line in input:
|
||||
proc.do_line(line.strip("\n"))
|
||||
proc.flush_page()
|
||||
proc.emit_trailer()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
126
prelude.ps
Normal file
126
prelude.ps
Normal file
@@ -0,0 +1,126 @@
|
||||
%!PS-Adobe-3.0
|
||||
%%Creator: Docproc.py
|
||||
%%Orientation: Portrait
|
||||
%%DocumentMedia: Letter 612 792 90 white ( )
|
||||
%%BeginDefaults
|
||||
%%PageMedia: Letter
|
||||
%%EndDefaults
|
||||
|
||||
%%BeginProlog
|
||||
/zDN false def
|
||||
/zNW zDN { 6 } { 0 } ifelse def
|
||||
/bP { % begin Page
|
||||
% height width pageName --
|
||||
% height and width are in characters
|
||||
|
||||
mark exch /Label exch /PAGELABEL pdfmark
|
||||
dup
|
||||
zPW exch zNW add zCW mul sub 2 div zNW zCW mul add /zX0 exch def
|
||||
1 add zCW mul zX0 add /zXC exch def
|
||||
zPH exch
|
||||
% -- zPH nlines
|
||||
1 sub zLS mul zCH add
|
||||
% -- zPH zTH
|
||||
add 2 div zCH sub /zY0 exch def
|
||||
} bind def
|
||||
/gL {
|
||||
zLS mul neg zY0 add /zY1 exch def
|
||||
} bind def % go line
|
||||
/tH {
|
||||
zX0 zY1 moveto
|
||||
0.5 setgray
|
||||
show
|
||||
} bind def % text Header
|
||||
/tN zDN {{
|
||||
zX0 zNW 1 add zCW mul sub zY1 moveto
|
||||
0.7 setgray
|
||||
show
|
||||
}} {{ }} ifelse bind def % text lineNo
|
||||
/tB {
|
||||
zX0 zY1 moveto
|
||||
0 setgray
|
||||
show
|
||||
} bind def % text body
|
||||
/mC {
|
||||
|
||||
0.75 0 0 setrgbcolor
|
||||
newpath
|
||||
1 setlinewidth
|
||||
zXC zY1 -0.2 zLS mul add moveto
|
||||
0 zLS rlineto
|
||||
stroke
|
||||
} bind def % mark Change
|
||||
/fC {
|
||||
0.2 add zLS mul neg zY0 add exch
|
||||
zCW mul zX0 add exch
|
||||
} bind def % from Charpos
|
||||
/mL { % -- left top right bottom name
|
||||
5 dict begin
|
||||
/_N exch def
|
||||
/_B exch def
|
||||
/_R exch def
|
||||
/_T exch def
|
||||
/_L exch def
|
||||
mark
|
||||
/Rect [
|
||||
_L _B fC
|
||||
_R _T 1 sub fC
|
||||
]
|
||||
/Border [ 0 0 0.5 ]
|
||||
/C [ 0 0 1 ]
|
||||
/Subtype /Link
|
||||
/Dest _N cvn
|
||||
/ANN
|
||||
pdfmark
|
||||
|
||||
% 0 1 0 setrgbcolor
|
||||
% 1 setlinewidth
|
||||
% newpath
|
||||
%
|
||||
% _L _T 1 sub fC moveto
|
||||
% _R _T 1 sub fC lineto
|
||||
% _R _B fC lineto
|
||||
% _L _B fC lineto
|
||||
% closepath
|
||||
% stroke
|
||||
end
|
||||
} bind def % mark Link
|
||||
/mD { % -- top name
|
||||
2 dict begin
|
||||
/_N exch def
|
||||
/_T exch 0 exch 1 sub fC exch pop def
|
||||
mark
|
||||
/Dest _N cvn
|
||||
/View [ /XYZ null _T null ]
|
||||
/DEST pdfmark
|
||||
end
|
||||
} bind def % mark Destination
|
||||
/mO { % -- dest title subitems
|
||||
3 dict begin
|
||||
/_S exch def
|
||||
/_T exch def
|
||||
/_D exch def
|
||||
mark
|
||||
/Title _T
|
||||
_S 0 gt {
|
||||
/Count _S
|
||||
} if
|
||||
/Dest _D cvn
|
||||
/OUT pdfmark
|
||||
end
|
||||
} bind def
|
||||
/sP { showpage } bind def
|
||||
|
||||
%%EndProlog
|
||||
|
||||
%%BeginSetup
|
||||
/zCH 10 def % char height
|
||||
/zLS zCH 1.2 mul def % line spacing
|
||||
(DroidSansMonoSlashed) findfont zCH scalefont setfont
|
||||
|
||||
% Globals
|
||||
/zCW (M) stringwidth pop def % char width
|
||||
/zPH { currentpagedevice /PageSize get 1 get } def % page height
|
||||
/zPW { currentpagedevice /PageSize get 0 get } def % page width
|
||||
%%EndSetup
|
||||
|
||||
29145
sources/mscp.txt
Normal file
29145
sources/mscp.txt
Normal file
File diff suppressed because it is too large
Load Diff
9676
sources/tmscp.txt
Normal file
9676
sources/tmscp.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user