1#!/usr/bin/env python3
2# Copyright (c) 2016 Jon Turney <jon.turney@dronecode.org.uk>
3#
4# python script to process makedoc instructions in a source file and produce
5# DocBook XML output
6#
7
8#
9# This performs 3 stages of processing on it's input, in a similar fashion
10# to makedoc:
11#
12# 1. Discard everything outside of /*  */ comments
13# 2. Identify lines which contains commands (a single uppercase word)
14# 3. Apply each command to the text of the following lines (up to the next
15#    command or the end of the comment block), to produce some output
16#
17# The resulting output contains one or more DocBook XML refentry elements.
18#
19# To make the output a valid XML document which can be xincluded, those refentry
20# elements are contained by a refcontainer element.  refcontainer is not part of
21# the DocBook DTD and should be removed by a suitable XSLT.
22#
23
24from __future__ import print_function
25
26import fcntl
27import sys
28import os
29import re
30from optparse import OptionParser
31import lxml.etree
32import ply.lex as lex
33import ply.yacc as yacc
34
35rootelement = None  # root element of the XML tree
36refentry = None     # the current refentry
37verbose = 0
38
39def dump(s, stage, threshold=1):
40    if verbose > threshold:
41        print('*' * 40, file=sys.stderr)
42        print(stage, file=sys.stderr)
43        print('*' * 40, file=sys.stderr)
44        print('%s' % s, file=sys.stderr)
45        print('*' * 40, file=sys.stderr)
46
47#
48# Stage 1
49#
50
51def skip_whitespace_and_stars(i, src):
52
53    while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i + 1] != '/')):
54        i += 1
55
56    return i
57
58# Discard everything not inside '/*  */' style-comments which start at column 0
59# Discard any leading blank space or '*'
60# Discard a single leading '.'
61# Discard blank lines after a blank line
62def comment_contents_generator(src):
63    i = 0
64
65    while i < len(src) - 2:
66        if src[i] == '\n' and src[i + 1] == '/' and src[i + 2] == '*':
67            i = i + 3
68
69            i = skip_whitespace_and_stars(i, src)
70
71            if src[i] == '.':
72                i += 1
73
74            while i < len(src):
75                if src[i] == '\n':
76                    yield '\n'
77                    i += 1
78
79                    # allow a single blank line
80                    if i < len(src) and src[i] == '\n':
81                        yield '\n'
82                        i += 1
83
84                    i = skip_whitespace_and_stars(i, src)
85
86                elif src[i] == '*' and src[i + 1] == '/':
87                    i = i + 2
88                    # If we have just output \n\n, this adds another blank line.
89                    # This is the only way a double blank line can occur.
90                    yield '\nEND\n'
91                    break
92                else:
93                    yield src[i]
94                    i += 1
95        else:
96            i += 1
97
98def remove_noncomments(src):
99    src = '\n' + src
100    dst = ''.join(comment_contents_generator(src))
101    dump(dst, 'extracted from comments')
102
103    return dst
104
105#
106# Stage 2
107#
108
109# A command is a single word of at least 3 characters, all uppercase, and alone on a line
110def iscommand(l):
111    if re.match(r'^[A-Z_]{3,}\s*$', l):
112
113        return True
114    return False
115
116def command_block_generator(content):
117    command = 'START'
118    text = ''
119
120    for l in content.splitlines():
121        if iscommand(l):
122            yield (command, text)
123            command = l.rstrip()
124            text = ''
125        else:
126            text = text + l + '\n'
127    yield (command, text)
128
129# Look for commands, which give instructions how to process the following input
130def process(content):
131    content = content.lstrip()
132
133    dump(content, 'about to process for commands')
134
135    # process into a list of tuples of commands and the associated following text
136    # it is important to maintain the order of the sections the commands generate
137    processed = list(command_block_generator(content))
138
139    return processed
140
141#
142# Stage 3
143#
144
145#  invoke each command on it's text
146def perform(processed):
147    for i in processed:
148        c = i[0].rstrip()
149        t = i[1].strip() + '\n'
150
151        if verbose:
152            print("performing command '%s'" % c, file=sys.stderr)
153
154        if c in command_dispatch_dict:
155            command_dispatch_dict[c](c, t)
156        else:
157            print("command '%s' is not recognized" % c, file=sys.stderr)
158            # the text following an unrecognized command is discarded
159
160# FUNCTION (aka TYPEDEF)
161#
162def function(c, l):
163    global refentry
164    global rootelement
165
166    l = l.strip()
167    if verbose:
168        print('FUNCTION %s' % l, file=sys.stderr)
169
170    separator = '---'
171
172    if ';' in l:
173        # fpclassify has an unusual format we also need to handle
174        spliton = ';'
175        l = l.splitlines()[0]
176    elif len(l.splitlines()) > 1:
177        # a few pages like mktemp have two '---' lines
178        spliton = ';'
179        o = ''
180        for i in l.splitlines():
181            if separator in i:
182                o += i + ';'
183            else:
184                o += i
185        l = o[:-1]
186    else:
187        spliton = '\n'
188
189    namelist = []
190    descrlist = []
191    for a in l.split(spliton):
192        (n, d) = a.split(separator, 1)
193        namelist = namelist + n.split(',')
194        descrlist = descrlist + [d]
195
196    # only copysign and log1p use <[ ]> markup in descr,
197    # only gets() uses << >> markup
198    # but we should handle it correctly
199    descr = line_markup_convert(', '.join(descrlist))
200
201    # fpclassify includes an 'and' we need to discard
202    namelist = map(lambda v: re.sub(r'^and ', r'', v.strip(), 1), namelist)
203    # strip off << >> surrounding name
204    namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
205    # instantiate list to make it subscriptable
206    namelist = list(namelist)
207
208    if verbose:
209        print(namelist, file=sys.stderr)
210    # additional alternate names may also appear in INDEX commands
211
212    # create the root element if needed
213    if rootelement is None:
214        rootelement = lxml.etree.Element('refentrycontainer')
215
216    # FUNCTION implies starting a new refentry
217    if refentry is not None:
218        sys.exit("multiple FUNCTIONs without NEWPAGE")
219
220    # create the refentry
221    refentry = lxml.etree.SubElement(rootelement, 'refentry')
222    refentry.append(lxml.etree.Comment(' Generated by makedocbook.py '))
223    refentry.set('id', namelist[0].lstrip('_'))
224
225    refmeta = lxml.etree.SubElement(refentry, 'refmeta')
226    # refentrytitle will be same as refdescriptor, the primary name
227    refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle')
228    refentrytitle.text = namelist[0]
229    manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum')
230    manvolnum.text = '3'
231
232    refnamediv = lxml.etree.SubElement(refentry, 'refnamediv')
233    # refdescriptor is the primary name, assume we should use the one which
234    # appears first in the list
235    refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor')
236    refdescriptor.text = namelist[0]
237    # refname elements exist for all alternate names
238    for n in namelist:
239        refname = lxml.etree.SubElement(refnamediv, 'refname')
240        refname.text = n
241    refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
242    refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
243
244    # Only FUNCTION currently exists, which implies that the SYNOPSIS should be
245    # a funcsynopsis.  If TYPEDEF was to be added, SYNOPSIS should be processed
246    # in a different way, probably producing a refsynopsis.
247
248# INDEX
249# may occur more than once for each FUNCTION giving alternate names this
250# function should be indexed under
251#
252def index(c, l):
253    l = l.strip()
254
255    if verbose:
256        print('INDEX %s' % l, file=sys.stderr)
257
258    # discard anything after the first word
259    l = l.split()[0]
260
261    # add indexterm
262    # (we could just index under all the refnames, but we control the indexing
263    # separately as that is what makedoc does)
264    indexterm = lxml.etree.SubElement(refentry, 'indexterm')
265    primary = lxml.etree.SubElement(indexterm, 'primary')
266    primary.text = l
267
268    # to validate, it seems we need to maintain refentry elements in a certain order
269    refentry[:] = sorted(refentry, key=lambda x: x.tag if isinstance(x.tag, str) else '')
270
271    # adds another alternate refname
272    refnamediv = refentry.find('refnamediv')
273
274    # as long as it doesn't already exist
275    if not refnamediv.xpath(('refname[.="%s"]') % l):
276        refname = lxml.etree.SubElement(refnamediv, 'refname')
277        refname.text = l
278        if verbose > 1:
279            print('added refname %s' % l, file=sys.stderr)
280    else:
281        if verbose > 1:
282            print('duplicate refname %s discarded' % l, file=sys.stderr)
283
284    # to validate, it seems we need to maintain refnamediv elements in a certain order
285    refnamediv[:] = sorted(refnamediv, key=lambda x: x.tag)
286
287
288# SYNOPSIS aka ANSI_SYNOPSIS
289# ANSI-style synopsis
290#
291# Note that makedoc would also process <<code>> markup here, but there are no
292# such uses.
293#
294def synopsis(c, t):
295    refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv')
296    funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis')
297
298    s = ''
299    for l in t.splitlines():
300        if re.match(r'\s*(#|\[|struct)', l):
301            # preprocessor # directives, structs, comments in square brackets
302            funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
303            funcsynopsisinfo.text = l.strip() + '\n'
304        elif re.match(r'[Ll]ink with', l):
305            pass
306        else:
307            s = s + l
308
309            # a prototype without a terminating ';' is an error
310            if s.endswith(')'):
311                sys.exit("'%s' missing terminating semicolon" % l)
312                s = s + ';'
313
314            if ';' in s:
315                synopsis_for_prototype(funcsynopsis, s)
316                s = ''
317
318    if s.strip():
319        sys.exit("surplus synopsis '%s'" % s)
320
321def synopsis_for_prototype(funcsynopsis, s):
322    s = s.strip()
323
324    # funcsynopsis has a very detailed content model, so we need to massage the
325    # bare prototype into it.  Fortunately, since the parameter names are marked
326    # up, we have enough information to do this.
327    for fp in s.split(';'):
328        fp = fp.strip()
329        if fp:
330
331            if verbose:
332                print("'%s'" % fp, file=sys.stderr)
333
334            match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
335
336            if verbose:
337                print(match.groups(), file=sys.stderr)
338
339            funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
340            funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
341            funcdef.text = match.group(1)
342            function = lxml.etree.SubElement(funcdef, 'function')
343            function.text = match.group(2)
344
345            if match.group(3).strip() == 'void':
346                void = lxml.etree.SubElement(funcprototype, 'void')
347            else:
348                # Split parameters on ',' except if it is inside ()
349                for p in re.split(r',(?![^()]*\))', match.group(3)):
350                    p = p.strip()
351
352                    if verbose:
353                        print(p, file=sys.stderr)
354
355                    if p == '...':
356                        varargs = lxml.etree.SubElement(funcprototype, 'varargs')
357                    else:
358                        paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
359                        parameter = lxml.etree.SubElement(paramdef, 'parameter')
360
361                        # <[ ]> enclose the parameter name
362                        match2 = re.match(r'(.*)<\[(.*)\]>(.*)', p)
363
364                        if verbose:
365                            print(match2.groups(), file=sys.stderr)
366
367                        paramdef.text = match2.group(1)
368                        parameter.text = match2.group(2)
369                        parameter.tail = match2.group(3)
370
371
372# DESCRIPTION
373# (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES  are handled the same)
374#
375# Create a refsect with a title corresponding to the command
376#
377# Nearly all the the existing DESCRIPTION contents could be transformed into
378# DocBook with a few regex substitutions.  Unfortunately, pages like sprintf and
379# sscanf, have very complex layout using nested tables and itemized lists, which
380# it is best to parse in order to transform correctly.
381#
382def refsect(t, s):
383    refsect = lxml.etree.SubElement(refentry, 'refsect1')
384    title = lxml.etree.SubElement(refsect, 'title')
385    title.text = t.title()
386
387    if verbose:
388        print('%s has %d paragraphs' % (t, len(s.split('\n\n'))), file=sys.stderr)
389
390    if verbose > 1:
391        dump(s, 'before lexing')
392
393        # dump out lexer token sequence
394        lex.input(s)
395        for tok in lexer:
396            print(tok, file=sys.stderr)
397
398    # parse the section text for makedoc markup and the few pieces of texinfo
399    # markup we understand, and output an XML marked-up string
400    xml = parser.parse(s, tracking=True, debug=(verbose > 2))
401
402    dump(xml, 'after parsing')
403
404    xml = '<refsect1>' + xml + '</refsect1>'
405
406    refsect.extend(lxml.etree.fromstring(xml))
407
408def seealso(c, t):
409    refsect('SEE ALSO', t)
410
411# NEWPAGE
412#
413# start a new refentry
414
415def newpage(c, t):
416    global refentry
417    refentry = None
418
419# command dispatch table
420
421def discarded(c, t):
422    return
423
424command_dispatch_dict = {
425    'FUNCTION':      function,
426    'TYPEDEF':       function,     # TYPEDEF is not currently used, but described in doc.str
427    'INDEX':         index,
428    'TRAD_SYNOPSIS': discarded,    # K&R-style synopsis, obsolete and discarded
429    'ANSI_SYNOPSIS': synopsis,
430    'SYNOPSIS':      synopsis,
431    'DESCRIPTION':   refsect,
432    'RETURNS':       refsect,
433    'ERRORS':        refsect,
434    'PORTABILITY':   refsect,
435    'BUGS':          refsect,
436    'WARNINGS':      refsect,
437    'SEEALSO':       seealso,
438    'NOTES':         refsect,      # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
439    'QUICKREF':      discarded,    # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
440    'MATHREF':       discarded,
441    'START':         discarded,    # a START command is inserted to contain the text before the first command
442    'END':           discarded,    # an END command is inserted merely to terminate the text for the last command in a comment block
443    'NEWPAGE':       newpage,
444}
445
446#
447# Utility functions
448#
449
450# apply transformations which are easy to do in-place
451def line_markup_convert(p):
452    s = p
453
454    # escape characters not allowed in XML
455    s = s.replace('&', '&amp;')
456    s = s.replace('<', '&lt;')
457    s = s.replace('>', '&gt;')
458
459    # convert <<somecode>> to <code>somecode</code> and <[var]> to
460    # <varname>var</varname>
461    # also handle nested << <[ ]> >> correctly
462    s = s.replace('&lt;&lt;', '<code>')
463    s = s.replace('&lt;[', '<varname>')
464    s = s.replace(']&gt;', '</varname>')
465    s = s.replace('&gt;&gt;', '</code>')
466
467    # also convert some simple texinfo markup
468    # convert @emph{foo} to <emphasis>foo</emphasis>
469    s = re.sub(r'@emph{(.*?)}', r'<emphasis>\1</emphasis>', s)
470    # convert @strong{foo} to <emphasis role=strong>foo</emphasis>
471    s = re.sub(r'@strong{(.*?)}', r'<emphasis role="strong">\1</emphasis>', s)
472    # convert @minus{} to U+2212 MINUS SIGN
473    s = s.replace('@minus{}', '&#x2212;')
474    # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS
475    s = s.replace('@dots{}', '&#x2026;')
476
477    # convert xref and pxref
478    s = re.sub(r'@xref{(.*?)}', r"See <xref linkend='\1'/>", s)
479
480    # very hacky way of dealing with @* to force a newline
481    s = s.replace('@*', '</para><para>')
482
483    # fail if there are unhandled texinfo commands
484    match = re.search(r'(?<!@)@[^@\s]+', s)
485    if match:
486        sys.exit("texinfo command '%s' remains in output" % match.group(0))
487
488    # process the texinfo escape for an @
489    s = s.replace('@@', '@')
490
491    if (verbose > 3) and (s != p):
492        print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
493
494    return s
495
496#
497# lexer
498#
499
500texinfo_commands = {
501    'ifnottex': 'IFNOTTEX',
502    'end ifnottex': 'ENDIFNOTTEX',
503    'tex': 'IFTEX',
504    'end tex': 'ENDIFTEX',
505    'comment': 'COMMENT',
506    'c ': 'COMMENT',
507    'multitable': 'MULTICOLUMNTABLE',
508    'end multitable': 'ENDMULTICOLUMNTABLE',
509    'headitem': 'MCT_HEADITEM',
510    'tab': 'MCT_COLUMN_SEPARATOR',
511    'item': 'MCT_ITEM',
512}
513
514# token names
515tokens = [
516    'BLANKLINE',
517    'BULLETEND',
518    'BULLETSTART',
519    'COURIER',
520    'EOF',
521    'ITEM',
522    'TABLEEND',
523    'TABLESTART',
524    'TEXINFO',
525    'TEXT',
526] + list(set(texinfo_commands.values()))
527
528# regular expression rules for tokens, in priority order
529# (all these expressions should match a whole line)
530def t_TEXINFO(t):
531    # this matches any @command. but not @command{} which just happens to be at
532    # the start of a line
533    r'@\w+[^{]*?\n'
534
535    # if the line starts with a known texinfo command, change t.type to the
536    # token for that command
537    for k in texinfo_commands.keys():
538        if t.value[1:].startswith(k):
539            t.type = texinfo_commands[k]
540            break
541
542    return t
543
544def t_COURIER(t):
545    r'[.|].*\n'
546    t.value = line_markup_convert(t.value[1:])
547    return t
548
549def t_BULLETSTART(t):
550    r'O\+\n'
551    return t
552
553def t_BULLETEND(t):
554    r'O-\n'
555    return t
556
557def t_TABLESTART(t):
558    r'o\+\n'
559    return t
560
561def t_TABLEEND(t):
562    r'o-\n'
563    return t
564
565def t_ITEM(t):
566    r'o\s.*\n'
567    t.value = re.sub(r'o\s', r'', lexer.lexmatch.group(0), 1)
568    t.value = line_markup_convert(t.value)
569    return t
570
571def t_TEXT(t):
572    r'.+\n'
573    t.value = line_markup_convert(t.value)
574    t.lexer.lineno += 1
575    return t
576
577def t_BLANKLINE(t):
578    r'\n'
579    t.lexer.lineno += 1
580    return t
581
582def t_eof(t):
583    if hasattr(t.lexer, 'at_eof'):
584        # remove eof flag ready for lexing next input
585        delattr(t.lexer, 'at_eof')
586        t.lexer.lineno = 0
587        return None
588
589    t.type = 'EOF'
590    t.lexer.at_eof = True
591
592    return t
593
594# Error handling rule
595def t_error(t):
596    sys.exit("tokenization error, remaining text '%s'" % t.value)
597
598lexer = lex.lex()
599
600#
601# parser
602#
603
604def parser_verbose(p):
605    if verbose > 2:
606        print(p[0], file=sys.stderr)
607
608def p_input(p):
609    '''input : paragraph
610             | input paragraph'''
611    if len(p) == 3:
612        p[0] = p[1] + '\n' + p[2]
613    else:
614        p[0] = p[1]
615    parser_verbose(p)
616
617# Strictly, text at top level should be paragraphs (i.e terminated by a
618# BLANKLINE), while text contained in rows or bullets may not be, but this
619# grammar doesn't enforce that for simplicity's sake.
620def p_paragraph(p):
621    '''paragraph : paragraph_content maybe_eof_or_blankline'''
622    p[0] = '<para>\n' + p[1] + '</para>'
623    parser_verbose(p)
624
625def p_paragraph_content(p):
626    '''paragraph_content : paragraph_line
627                         | paragraph_line paragraph_content'''
628    if len(p) == 3:
629        p[0] = p[1] + p[2]
630    else:
631        p[0] = p[1]
632    parser_verbose(p)
633
634def p_paragraph_line(p):
635    '''paragraph_line : TEXT
636                      | texinfocmd
637                      | courierblock
638                      | table
639                      | bulletlist'''
640    p[0] = p[1]
641
642def p_empty(p):
643    'empty :'
644    p[0] = ''
645
646def p_maybe_eof_or_blankline(p):
647    '''maybe_eof_or_blankline : empty
648                              | EOF
649                              | BLANKLINE
650                              | BLANKLINE EOF'''
651    p[0] = ''
652
653def p_maybe_lines(p):
654    '''maybe_lines : empty
655                   | paragraph maybe_lines'''
656    if len(p) == 3:
657        p[0] = p[1] + p[2]
658    else:
659        p[0] = p[1]
660    parser_verbose(p)
661
662def p_maybe_blankline(p):
663    '''maybe_blankline : empty
664                       | BLANKLINE'''
665    p[0] = ''
666
667def p_courierblock(p):
668    '''courierblock : courier'''
669    p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>'
670    parser_verbose(p)
671
672def p_courier(p):
673    '''courier : COURIER
674               | COURIER courier'''
675    if len(p) == 3:
676        p[0] = p[1] + p[2]
677    else:
678        p[0] = p[1]
679    parser_verbose(p)
680
681def p_bullet(p):
682    '''bullet : ITEM maybe_lines
683              | ITEM BLANKLINE maybe_lines'''
684    if len(p) == 3:
685        # Glue any text in ITEM into the first para of maybe_lines
686        # (This is an unfortunate consequence of the line-based tokenization we do)
687        if p[2].startswith('<para>'):
688            p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
689        else:
690            p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
691    else:
692        p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
693    parser_verbose(p)
694
695def p_bullets(p):
696    '''bullets : bullet
697               | bullet bullets'''
698    if len(p) == 3:
699        p[0] = p[1] + '\n' + p[2]
700    else:
701        p[0] = p[1]
702    parser_verbose(p)
703
704def p_bulletlist(p):
705    '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline'''
706    p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>'
707    parser_verbose(p)
708
709def p_row(p):
710    '''row : ITEM maybe_lines
711           | ITEM BLANKLINE maybe_lines'''
712    if len(p) == 3:
713        p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
714    else:
715        p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
716    parser_verbose(p)
717
718def p_rows(p):
719    '''rows : row
720            | row rows'''
721    if len(p) == 3:
722        p[0] = p[1] + '\n' + p[2]
723    else:
724        p[0] = p[1]
725    parser_verbose(p)
726
727def p_table(p):
728    '''table : TABLESTART rows TABLEEND maybe_blankline'''
729    p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>'
730    parser_verbose(p)
731
732def p_texinfocmd(p):
733    '''texinfocmd : unknown_texinfocmd
734                  | comment
735                  | multitable
736                  | nottex
737                  | tex'''
738    p[0] = p[1]
739
740def p_unknown_texinfocmd(p):
741    '''unknown_texinfocmd : TEXINFO'''
742    print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr)
743    p[0] = p[1]
744    parser_verbose(p)
745
746def p_nottex(p):
747    '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX'''
748    p[0] = p[2]
749
750def p_tex(p):
751    '''tex : IFTEX paragraph_content ENDIFTEX'''
752    # text for TeX formatter inside @iftex is discarded
753    p[0] = ''
754
755def p_comment(p):
756    '''comment : COMMENT'''
757    # comment text is discarded
758    p[0] = ''
759
760def p_mct_columns(p):
761    '''mct_columns : maybe_lines
762                   | maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
763    if len(p) == 4:
764        p[0] = '<entry>' + p[1] + '</entry>' + p[3]
765    else:
766        p[0] = '<entry>' + p[1] + '</entry>'
767    parser_verbose(p)
768
769def p_mct_row(p):
770    '''mct_row : MCT_ITEM mct_columns'''
771    p[0] = '<row>' + p[2] + '</row>'
772    parser_verbose(p)
773
774def p_mct_rows(p):
775    '''mct_rows : mct_row
776                | mct_row mct_rows'''
777    if len(p) == 3:
778        p[0] = p[1] + '\n' + p[2]
779    else:
780        p[0] = p[1]
781    parser_verbose(p)
782
783def p_mct_header(p):
784    '''mct_header : MCT_HEADITEM mct_columns'''
785    p[0] = '<row>' + p[2] + '</row>'
786    parser_verbose(p)
787
788def p_multitable(p):
789    '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE'''
790    # this doesn't handle the prototype row form of @multitable, only the @columnfractions form
791    colfrac = p[1].replace('@multitable @columnfractions', '').split()
792    colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac])
793    header = '<thead>' + p[2] + '</thead>\n'
794    body = '<tbody>' + p[3] + '</tbody>\n'
795    p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) + '">' + colspec + header + body + '</tgroup></informaltable>'
796    parser_verbose(p)
797
798
799def p_error(t):
800    sys.exit('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token()))
801
802
803# protect creating the parser with a lockfile, so that when multiple processes
804# are running this script simultaneously, we don't get one of them generating a
805# parsetab.py file, while another one attempts to read it...
806#
807# see also https://github.com/dabeaz/ply/pull/184
808with open(os.path.join(os.path.dirname(__file__), 'parsetab.lock'), 'w+') as lockfile:
809    fcntl.flock(lockfile.fileno(), fcntl.LOCK_EX)
810    parser = yacc.yacc(start='input')
811    fcntl.flock(lockfile.fileno(), fcntl.LOCK_UN)
812
813#
814#
815#
816
817def main(file):
818    content = file.read()
819    content = remove_noncomments(content)
820    processed = process(content)
821    perform(processed)
822
823    # output the XML tree
824    s = lxml.etree.tostring(rootelement, pretty_print=True, encoding='unicode')
825
826    if not s:
827        print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
828        exit(1)
829
830    print(s)
831
832
833#
834#
835#
836if __name__ == '__main__':
837    options = OptionParser()
838    options.add_option('-v', '--verbose', action='count', dest='verbose', default=0)
839    (opts, args) = options.parse_args()
840
841    verbose = opts.verbose
842
843    if len(args) > 0:
844        main(open(args[0], 'rb'))
845    else:
846        main(sys.stdin)
847