1#!/usr/bin/env python3 2# Copyright (c) 2016 Jon Turney <jon.turney@dronecode.org.uk> 3# 4# python script to process makedoc instructions in a source file and produce 5# DocBook XML output 6# 7 8# 9# This performs 3 stages of processing on it's input, in a similar fashion 10# to makedoc: 11# 12# 1. Discard everything outside of /* */ comments 13# 2. Identify lines which contains commands (a single uppercase word) 14# 3. Apply each command to the text of the following lines (up to the next 15# command or the end of the comment block), to produce some output 16# 17# The resulting output contains one or more DocBook XML refentry elements. 18# 19# To make the output a valid XML document which can be xincluded, those refentry 20# elements are contained by a refcontainer element. refcontainer is not part of 21# the DocBook DTD and should be removed by a suitable XSLT. 22# 23 24from __future__ import print_function 25 26import fcntl 27import sys 28import os 29import re 30from optparse import OptionParser 31import lxml.etree 32import ply.lex as lex 33import ply.yacc as yacc 34 35rootelement = None # root element of the XML tree 36refentry = None # the current refentry 37verbose = 0 38 39def dump(s, stage, threshold=1): 40 if verbose > threshold: 41 print('*' * 40, file=sys.stderr) 42 print(stage, file=sys.stderr) 43 print('*' * 40, file=sys.stderr) 44 print('%s' % s, file=sys.stderr) 45 print('*' * 40, file=sys.stderr) 46 47# 48# Stage 1 49# 50 51def skip_whitespace_and_stars(i, src): 52 53 while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i + 1] != '/')): 54 i += 1 55 56 return i 57 58# Discard everything not inside '/* */' style-comments which start at column 0 59# Discard any leading blank space or '*' 60# Discard a single leading '.' 61# Discard blank lines after a blank line 62def comment_contents_generator(src): 63 i = 0 64 65 while i < len(src) - 2: 66 if src[i] == '\n' and src[i + 1] == '/' and src[i + 2] == '*': 67 i = i + 3 68 69 i = skip_whitespace_and_stars(i, src) 70 71 if src[i] == '.': 72 i += 1 73 74 while i < len(src): 75 if src[i] == '\n': 76 yield '\n' 77 i += 1 78 79 # allow a single blank line 80 if i < len(src) and src[i] == '\n': 81 yield '\n' 82 i += 1 83 84 i = skip_whitespace_and_stars(i, src) 85 86 elif src[i] == '*' and src[i + 1] == '/': 87 i = i + 2 88 # If we have just output \n\n, this adds another blank line. 89 # This is the only way a double blank line can occur. 90 yield '\nEND\n' 91 break 92 else: 93 yield src[i] 94 i += 1 95 else: 96 i += 1 97 98def remove_noncomments(src): 99 src = '\n' + src 100 dst = ''.join(comment_contents_generator(src)) 101 dump(dst, 'extracted from comments') 102 103 return dst 104 105# 106# Stage 2 107# 108 109# A command is a single word of at least 3 characters, all uppercase, and alone on a line 110def iscommand(l): 111 if re.match(r'^[A-Z_]{3,}\s*$', l): 112 113 return True 114 return False 115 116def command_block_generator(content): 117 command = 'START' 118 text = '' 119 120 for l in content.splitlines(): 121 if iscommand(l): 122 yield (command, text) 123 command = l.rstrip() 124 text = '' 125 else: 126 text = text + l + '\n' 127 yield (command, text) 128 129# Look for commands, which give instructions how to process the following input 130def process(content): 131 content = content.lstrip() 132 133 dump(content, 'about to process for commands') 134 135 # process into a list of tuples of commands and the associated following text 136 # it is important to maintain the order of the sections the commands generate 137 processed = list(command_block_generator(content)) 138 139 return processed 140 141# 142# Stage 3 143# 144 145# invoke each command on it's text 146def perform(processed): 147 for i in processed: 148 c = i[0].rstrip() 149 t = i[1].strip() + '\n' 150 151 if verbose: 152 print("performing command '%s'" % c, file=sys.stderr) 153 154 if c in command_dispatch_dict: 155 command_dispatch_dict[c](c, t) 156 else: 157 print("command '%s' is not recognized" % c, file=sys.stderr) 158 # the text following an unrecognized command is discarded 159 160# FUNCTION (aka TYPEDEF) 161# 162def function(c, l): 163 global refentry 164 global rootelement 165 166 l = l.strip() 167 if verbose: 168 print('FUNCTION %s' % l, file=sys.stderr) 169 170 separator = '---' 171 172 if ';' in l: 173 # fpclassify has an unusual format we also need to handle 174 spliton = ';' 175 l = l.splitlines()[0] 176 elif len(l.splitlines()) > 1: 177 # a few pages like mktemp have two '---' lines 178 spliton = ';' 179 o = '' 180 for i in l.splitlines(): 181 if separator in i: 182 o += i + ';' 183 else: 184 o += i 185 l = o[:-1] 186 else: 187 spliton = '\n' 188 189 namelist = [] 190 descrlist = [] 191 for a in l.split(spliton): 192 (n, d) = a.split(separator, 1) 193 namelist = namelist + n.split(',') 194 descrlist = descrlist + [d] 195 196 # only copysign and log1p use <[ ]> markup in descr, 197 # only gets() uses << >> markup 198 # but we should handle it correctly 199 descr = line_markup_convert(', '.join(descrlist)) 200 201 # fpclassify includes an 'and' we need to discard 202 namelist = map(lambda v: re.sub(r'^and ', r'', v.strip(), 1), namelist) 203 # strip off << >> surrounding name 204 namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist) 205 # instantiate list to make it subscriptable 206 namelist = list(namelist) 207 208 if verbose: 209 print(namelist, file=sys.stderr) 210 # additional alternate names may also appear in INDEX commands 211 212 # create the root element if needed 213 if rootelement is None: 214 rootelement = lxml.etree.Element('refentrycontainer') 215 216 # FUNCTION implies starting a new refentry 217 if refentry is not None: 218 sys.exit("multiple FUNCTIONs without NEWPAGE") 219 220 # create the refentry 221 refentry = lxml.etree.SubElement(rootelement, 'refentry') 222 refentry.append(lxml.etree.Comment(' Generated by makedocbook.py ')) 223 refentry.set('id', namelist[0].lstrip('_')) 224 225 refmeta = lxml.etree.SubElement(refentry, 'refmeta') 226 # refentrytitle will be same as refdescriptor, the primary name 227 refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle') 228 refentrytitle.text = namelist[0] 229 manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum') 230 manvolnum.text = '3' 231 232 refnamediv = lxml.etree.SubElement(refentry, 'refnamediv') 233 # refdescriptor is the primary name, assume we should use the one which 234 # appears first in the list 235 refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor') 236 refdescriptor.text = namelist[0] 237 # refname elements exist for all alternate names 238 for n in namelist: 239 refname = lxml.etree.SubElement(refnamediv, 'refname') 240 refname.text = n 241 refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose') 242 refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>')) 243 244 # Only FUNCTION currently exists, which implies that the SYNOPSIS should be 245 # a funcsynopsis. If TYPEDEF was to be added, SYNOPSIS should be processed 246 # in a different way, probably producing a refsynopsis. 247 248# INDEX 249# may occur more than once for each FUNCTION giving alternate names this 250# function should be indexed under 251# 252def index(c, l): 253 l = l.strip() 254 255 if verbose: 256 print('INDEX %s' % l, file=sys.stderr) 257 258 # discard anything after the first word 259 l = l.split()[0] 260 261 # add indexterm 262 # (we could just index under all the refnames, but we control the indexing 263 # separately as that is what makedoc does) 264 indexterm = lxml.etree.SubElement(refentry, 'indexterm') 265 primary = lxml.etree.SubElement(indexterm, 'primary') 266 primary.text = l 267 268 # to validate, it seems we need to maintain refentry elements in a certain order 269 refentry[:] = sorted(refentry, key=lambda x: x.tag if isinstance(x.tag, str) else '') 270 271 # adds another alternate refname 272 refnamediv = refentry.find('refnamediv') 273 274 # as long as it doesn't already exist 275 if not refnamediv.xpath(('refname[.="%s"]') % l): 276 refname = lxml.etree.SubElement(refnamediv, 'refname') 277 refname.text = l 278 if verbose > 1: 279 print('added refname %s' % l, file=sys.stderr) 280 else: 281 if verbose > 1: 282 print('duplicate refname %s discarded' % l, file=sys.stderr) 283 284 # to validate, it seems we need to maintain refnamediv elements in a certain order 285 refnamediv[:] = sorted(refnamediv, key=lambda x: x.tag) 286 287 288# SYNOPSIS aka ANSI_SYNOPSIS 289# ANSI-style synopsis 290# 291# Note that makedoc would also process <<code>> markup here, but there are no 292# such uses. 293# 294def synopsis(c, t): 295 refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv') 296 funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis') 297 298 s = '' 299 for l in t.splitlines(): 300 if re.match(r'\s*(#|\[|struct)', l): 301 # preprocessor # directives, structs, comments in square brackets 302 funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo') 303 funcsynopsisinfo.text = l.strip() + '\n' 304 elif re.match(r'[Ll]ink with', l): 305 pass 306 else: 307 s = s + l 308 309 # a prototype without a terminating ';' is an error 310 if s.endswith(')'): 311 sys.exit("'%s' missing terminating semicolon" % l) 312 s = s + ';' 313 314 if ';' in s: 315 synopsis_for_prototype(funcsynopsis, s) 316 s = '' 317 318 if s.strip(): 319 sys.exit("surplus synopsis '%s'" % s) 320 321def synopsis_for_prototype(funcsynopsis, s): 322 s = s.strip() 323 324 # funcsynopsis has a very detailed content model, so we need to massage the 325 # bare prototype into it. Fortunately, since the parameter names are marked 326 # up, we have enough information to do this. 327 for fp in s.split(';'): 328 fp = fp.strip() 329 if fp: 330 331 if verbose: 332 print("'%s'" % fp, file=sys.stderr) 333 334 match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp) 335 336 if verbose: 337 print(match.groups(), file=sys.stderr) 338 339 funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype') 340 funcdef = lxml.etree.SubElement(funcprototype, 'funcdef') 341 funcdef.text = match.group(1) 342 function = lxml.etree.SubElement(funcdef, 'function') 343 function.text = match.group(2) 344 345 if match.group(3).strip() == 'void': 346 void = lxml.etree.SubElement(funcprototype, 'void') 347 else: 348 # Split parameters on ',' except if it is inside () 349 for p in re.split(r',(?![^()]*\))', match.group(3)): 350 p = p.strip() 351 352 if verbose: 353 print(p, file=sys.stderr) 354 355 if p == '...': 356 varargs = lxml.etree.SubElement(funcprototype, 'varargs') 357 else: 358 paramdef = lxml.etree.SubElement(funcprototype, 'paramdef') 359 parameter = lxml.etree.SubElement(paramdef, 'parameter') 360 361 # <[ ]> enclose the parameter name 362 match2 = re.match(r'(.*)<\[(.*)\]>(.*)', p) 363 364 if verbose: 365 print(match2.groups(), file=sys.stderr) 366 367 paramdef.text = match2.group(1) 368 parameter.text = match2.group(2) 369 parameter.tail = match2.group(3) 370 371 372# DESCRIPTION 373# (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES are handled the same) 374# 375# Create a refsect with a title corresponding to the command 376# 377# Nearly all the the existing DESCRIPTION contents could be transformed into 378# DocBook with a few regex substitutions. Unfortunately, pages like sprintf and 379# sscanf, have very complex layout using nested tables and itemized lists, which 380# it is best to parse in order to transform correctly. 381# 382def refsect(t, s): 383 refsect = lxml.etree.SubElement(refentry, 'refsect1') 384 title = lxml.etree.SubElement(refsect, 'title') 385 title.text = t.title() 386 387 if verbose: 388 print('%s has %d paragraphs' % (t, len(s.split('\n\n'))), file=sys.stderr) 389 390 if verbose > 1: 391 dump(s, 'before lexing') 392 393 # dump out lexer token sequence 394 lex.input(s) 395 for tok in lexer: 396 print(tok, file=sys.stderr) 397 398 # parse the section text for makedoc markup and the few pieces of texinfo 399 # markup we understand, and output an XML marked-up string 400 xml = parser.parse(s, tracking=True, debug=(verbose > 2)) 401 402 dump(xml, 'after parsing') 403 404 xml = '<refsect1>' + xml + '</refsect1>' 405 406 refsect.extend(lxml.etree.fromstring(xml)) 407 408def seealso(c, t): 409 refsect('SEE ALSO', t) 410 411# NEWPAGE 412# 413# start a new refentry 414 415def newpage(c, t): 416 global refentry 417 refentry = None 418 419# command dispatch table 420 421def discarded(c, t): 422 return 423 424command_dispatch_dict = { 425 'FUNCTION': function, 426 'TYPEDEF': function, # TYPEDEF is not currently used, but described in doc.str 427 'INDEX': index, 428 'TRAD_SYNOPSIS': discarded, # K&R-style synopsis, obsolete and discarded 429 'ANSI_SYNOPSIS': synopsis, 430 'SYNOPSIS': synopsis, 431 'DESCRIPTION': refsect, 432 'RETURNS': refsect, 433 'ERRORS': refsect, 434 'PORTABILITY': refsect, 435 'BUGS': refsect, 436 'WARNINGS': refsect, 437 'SEEALSO': seealso, 438 'NOTES': refsect, # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right 439 'QUICKREF': discarded, # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently 440 'MATHREF': discarded, 441 'START': discarded, # a START command is inserted to contain the text before the first command 442 'END': discarded, # an END command is inserted merely to terminate the text for the last command in a comment block 443 'NEWPAGE': newpage, 444} 445 446# 447# Utility functions 448# 449 450# apply transformations which are easy to do in-place 451def line_markup_convert(p): 452 s = p 453 454 # escape characters not allowed in XML 455 s = s.replace('&', '&') 456 s = s.replace('<', '<') 457 s = s.replace('>', '>') 458 459 # convert <<somecode>> to <code>somecode</code> and <[var]> to 460 # <varname>var</varname> 461 # also handle nested << <[ ]> >> correctly 462 s = s.replace('<<', '<code>') 463 s = s.replace('<[', '<varname>') 464 s = s.replace(']>', '</varname>') 465 s = s.replace('>>', '</code>') 466 467 # also convert some simple texinfo markup 468 # convert @emph{foo} to <emphasis>foo</emphasis> 469 s = re.sub(r'@emph{(.*?)}', r'<emphasis>\1</emphasis>', s) 470 # convert @strong{foo} to <emphasis role=strong>foo</emphasis> 471 s = re.sub(r'@strong{(.*?)}', r'<emphasis role="strong">\1</emphasis>', s) 472 # convert @minus{} to U+2212 MINUS SIGN 473 s = s.replace('@minus{}', '−') 474 # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS 475 s = s.replace('@dots{}', '…') 476 477 # convert xref and pxref 478 s = re.sub(r'@xref{(.*?)}', r"See <xref linkend='\1'/>", s) 479 480 # very hacky way of dealing with @* to force a newline 481 s = s.replace('@*', '</para><para>') 482 483 # fail if there are unhandled texinfo commands 484 match = re.search(r'(?<!@)@[^@\s]+', s) 485 if match: 486 sys.exit("texinfo command '%s' remains in output" % match.group(0)) 487 488 # process the texinfo escape for an @ 489 s = s.replace('@@', '@') 490 491 if (verbose > 3) and (s != p): 492 print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr) 493 494 return s 495 496# 497# lexer 498# 499 500texinfo_commands = { 501 'ifnottex': 'IFNOTTEX', 502 'end ifnottex': 'ENDIFNOTTEX', 503 'tex': 'IFTEX', 504 'end tex': 'ENDIFTEX', 505 'comment': 'COMMENT', 506 'c ': 'COMMENT', 507 'multitable': 'MULTICOLUMNTABLE', 508 'end multitable': 'ENDMULTICOLUMNTABLE', 509 'headitem': 'MCT_HEADITEM', 510 'tab': 'MCT_COLUMN_SEPARATOR', 511 'item': 'MCT_ITEM', 512} 513 514# token names 515tokens = [ 516 'BLANKLINE', 517 'BULLETEND', 518 'BULLETSTART', 519 'COURIER', 520 'EOF', 521 'ITEM', 522 'TABLEEND', 523 'TABLESTART', 524 'TEXINFO', 525 'TEXT', 526] + list(set(texinfo_commands.values())) 527 528# regular expression rules for tokens, in priority order 529# (all these expressions should match a whole line) 530def t_TEXINFO(t): 531 # this matches any @command. but not @command{} which just happens to be at 532 # the start of a line 533 r'@\w+[^{]*?\n' 534 535 # if the line starts with a known texinfo command, change t.type to the 536 # token for that command 537 for k in texinfo_commands.keys(): 538 if t.value[1:].startswith(k): 539 t.type = texinfo_commands[k] 540 break 541 542 return t 543 544def t_COURIER(t): 545 r'[.|].*\n' 546 t.value = line_markup_convert(t.value[1:]) 547 return t 548 549def t_BULLETSTART(t): 550 r'O\+\n' 551 return t 552 553def t_BULLETEND(t): 554 r'O-\n' 555 return t 556 557def t_TABLESTART(t): 558 r'o\+\n' 559 return t 560 561def t_TABLEEND(t): 562 r'o-\n' 563 return t 564 565def t_ITEM(t): 566 r'o\s.*\n' 567 t.value = re.sub(r'o\s', r'', lexer.lexmatch.group(0), 1) 568 t.value = line_markup_convert(t.value) 569 return t 570 571def t_TEXT(t): 572 r'.+\n' 573 t.value = line_markup_convert(t.value) 574 t.lexer.lineno += 1 575 return t 576 577def t_BLANKLINE(t): 578 r'\n' 579 t.lexer.lineno += 1 580 return t 581 582def t_eof(t): 583 if hasattr(t.lexer, 'at_eof'): 584 # remove eof flag ready for lexing next input 585 delattr(t.lexer, 'at_eof') 586 t.lexer.lineno = 0 587 return None 588 589 t.type = 'EOF' 590 t.lexer.at_eof = True 591 592 return t 593 594# Error handling rule 595def t_error(t): 596 sys.exit("tokenization error, remaining text '%s'" % t.value) 597 598lexer = lex.lex() 599 600# 601# parser 602# 603 604def parser_verbose(p): 605 if verbose > 2: 606 print(p[0], file=sys.stderr) 607 608def p_input(p): 609 '''input : paragraph 610 | input paragraph''' 611 if len(p) == 3: 612 p[0] = p[1] + '\n' + p[2] 613 else: 614 p[0] = p[1] 615 parser_verbose(p) 616 617# Strictly, text at top level should be paragraphs (i.e terminated by a 618# BLANKLINE), while text contained in rows or bullets may not be, but this 619# grammar doesn't enforce that for simplicity's sake. 620def p_paragraph(p): 621 '''paragraph : paragraph_content maybe_eof_or_blankline''' 622 p[0] = '<para>\n' + p[1] + '</para>' 623 parser_verbose(p) 624 625def p_paragraph_content(p): 626 '''paragraph_content : paragraph_line 627 | paragraph_line paragraph_content''' 628 if len(p) == 3: 629 p[0] = p[1] + p[2] 630 else: 631 p[0] = p[1] 632 parser_verbose(p) 633 634def p_paragraph_line(p): 635 '''paragraph_line : TEXT 636 | texinfocmd 637 | courierblock 638 | table 639 | bulletlist''' 640 p[0] = p[1] 641 642def p_empty(p): 643 'empty :' 644 p[0] = '' 645 646def p_maybe_eof_or_blankline(p): 647 '''maybe_eof_or_blankline : empty 648 | EOF 649 | BLANKLINE 650 | BLANKLINE EOF''' 651 p[0] = '' 652 653def p_maybe_lines(p): 654 '''maybe_lines : empty 655 | paragraph maybe_lines''' 656 if len(p) == 3: 657 p[0] = p[1] + p[2] 658 else: 659 p[0] = p[1] 660 parser_verbose(p) 661 662def p_maybe_blankline(p): 663 '''maybe_blankline : empty 664 | BLANKLINE''' 665 p[0] = '' 666 667def p_courierblock(p): 668 '''courierblock : courier''' 669 p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>' 670 parser_verbose(p) 671 672def p_courier(p): 673 '''courier : COURIER 674 | COURIER courier''' 675 if len(p) == 3: 676 p[0] = p[1] + p[2] 677 else: 678 p[0] = p[1] 679 parser_verbose(p) 680 681def p_bullet(p): 682 '''bullet : ITEM maybe_lines 683 | ITEM BLANKLINE maybe_lines''' 684 if len(p) == 3: 685 # Glue any text in ITEM into the first para of maybe_lines 686 # (This is an unfortunate consequence of the line-based tokenization we do) 687 if p[2].startswith('<para>'): 688 p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>' 689 else: 690 p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>' 691 else: 692 p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>' 693 parser_verbose(p) 694 695def p_bullets(p): 696 '''bullets : bullet 697 | bullet bullets''' 698 if len(p) == 3: 699 p[0] = p[1] + '\n' + p[2] 700 else: 701 p[0] = p[1] 702 parser_verbose(p) 703 704def p_bulletlist(p): 705 '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline''' 706 p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>' 707 parser_verbose(p) 708 709def p_row(p): 710 '''row : ITEM maybe_lines 711 | ITEM BLANKLINE maybe_lines''' 712 if len(p) == 3: 713 p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>' 714 else: 715 p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>' 716 parser_verbose(p) 717 718def p_rows(p): 719 '''rows : row 720 | row rows''' 721 if len(p) == 3: 722 p[0] = p[1] + '\n' + p[2] 723 else: 724 p[0] = p[1] 725 parser_verbose(p) 726 727def p_table(p): 728 '''table : TABLESTART rows TABLEEND maybe_blankline''' 729 p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>' 730 parser_verbose(p) 731 732def p_texinfocmd(p): 733 '''texinfocmd : unknown_texinfocmd 734 | comment 735 | multitable 736 | nottex 737 | tex''' 738 p[0] = p[1] 739 740def p_unknown_texinfocmd(p): 741 '''unknown_texinfocmd : TEXINFO''' 742 print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr) 743 p[0] = p[1] 744 parser_verbose(p) 745 746def p_nottex(p): 747 '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX''' 748 p[0] = p[2] 749 750def p_tex(p): 751 '''tex : IFTEX paragraph_content ENDIFTEX''' 752 # text for TeX formatter inside @iftex is discarded 753 p[0] = '' 754 755def p_comment(p): 756 '''comment : COMMENT''' 757 # comment text is discarded 758 p[0] = '' 759 760def p_mct_columns(p): 761 '''mct_columns : maybe_lines 762 | maybe_lines MCT_COLUMN_SEPARATOR mct_columns''' 763 if len(p) == 4: 764 p[0] = '<entry>' + p[1] + '</entry>' + p[3] 765 else: 766 p[0] = '<entry>' + p[1] + '</entry>' 767 parser_verbose(p) 768 769def p_mct_row(p): 770 '''mct_row : MCT_ITEM mct_columns''' 771 p[0] = '<row>' + p[2] + '</row>' 772 parser_verbose(p) 773 774def p_mct_rows(p): 775 '''mct_rows : mct_row 776 | mct_row mct_rows''' 777 if len(p) == 3: 778 p[0] = p[1] + '\n' + p[2] 779 else: 780 p[0] = p[1] 781 parser_verbose(p) 782 783def p_mct_header(p): 784 '''mct_header : MCT_HEADITEM mct_columns''' 785 p[0] = '<row>' + p[2] + '</row>' 786 parser_verbose(p) 787 788def p_multitable(p): 789 '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE''' 790 # this doesn't handle the prototype row form of @multitable, only the @columnfractions form 791 colfrac = p[1].replace('@multitable @columnfractions', '').split() 792 colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac]) 793 header = '<thead>' + p[2] + '</thead>\n' 794 body = '<tbody>' + p[3] + '</tbody>\n' 795 p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) + '">' + colspec + header + body + '</tgroup></informaltable>' 796 parser_verbose(p) 797 798 799def p_error(t): 800 sys.exit('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token())) 801 802 803# protect creating the parser with a lockfile, so that when multiple processes 804# are running this script simultaneously, we don't get one of them generating a 805# parsetab.py file, while another one attempts to read it... 806# 807# see also https://github.com/dabeaz/ply/pull/184 808with open(os.path.join(os.path.dirname(__file__), 'parsetab.lock'), 'w+') as lockfile: 809 fcntl.flock(lockfile.fileno(), fcntl.LOCK_EX) 810 parser = yacc.yacc(start='input') 811 fcntl.flock(lockfile.fileno(), fcntl.LOCK_UN) 812 813# 814# 815# 816 817def main(file): 818 content = file.read() 819 content = remove_noncomments(content) 820 processed = process(content) 821 perform(processed) 822 823 # output the XML tree 824 s = lxml.etree.tostring(rootelement, pretty_print=True, encoding='unicode') 825 826 if not s: 827 print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr) 828 exit(1) 829 830 print(s) 831 832 833# 834# 835# 836if __name__ == '__main__': 837 options = OptionParser() 838 options.add_option('-v', '--verbose', action='count', dest='verbose', default=0) 839 (opts, args) = options.parse_args() 840 841 verbose = opts.verbose 842 843 if len(args) > 0: 844 main(open(args[0], 'rb')) 845 else: 846 main(sys.stdin) 847