[talweg.git] / reports / ipynb_generator.py

#!/usr/bin/env python

import sys, os, re, logging

# Languages mapping as used by markdown/pandoc
shortname2language = dict(
    c='C',
    cpp='Cpp',
    f='Fortran',
    html='HTML',
    js='JavaScript',
    r='R',
    rb='Ruby',
    pl='Perl',
    py='Python',
    sh='Bash',
    tex='Tex',
    )

def read(text, argv=sys.argv[2:]):
    lines = text.splitlines()
    # First read all include statements
    for i in range(len(lines)):
        if lines[i].startswith('#include "'):
            filename = lines[i].split('"')[1]
            with open(filename, 'r') as f:
                include_text = f.read()
            lines[i] = include_text
    text = '\n'.join(lines)
    logging.info('******* text after include:\n{}'.format(text))

    # Run Mako
    mako_kwargs = {}
    for arg in argv:
        key, value = arg.split('=')
        mako_kwargs[key] = value

    try:
        import mako
        has_mako = True
    except ImportError:
        print('Cannot import mako - mako is not run')
        has_mako = False

    if has_mako:
        from mako.template import Template
        from mako.lookup import TemplateLookup
        lookup = TemplateLookup(directories=[os.curdir])
#        text = text.encode('utf-8')
        temp = Template(text=text, lookup=lookup, strict_undefined=True)
        logging.info('******* mako_kwargs: {}'.format(str(mako_kwargs)))
        text = temp.render(**mako_kwargs)

    logging.info('******* text after mako:\n{}'.format(text))

    # Parse the cells
    lines = text.splitlines()
    cells = []
    inside = None    # indicates which type of cell we are inside
    fullname = None  # full language name in code cells
    for line in lines:
        if line.startswith('-----'):
            # New cell, what type?
            m = re.search(r'-----([a-z0-9-]+)?', line)
            if m:
                shortname = m.group(1)
                if shortname:
                    # Check if code is to be typeset as static
                    # Markdown code (e.g., shortname=py-t)
                    logging.info('******* found shortname {}'
                        .format(shortname))
                    astext = shortname[-2:] == '-t'
                    logging.info('******* cell: astext={} shortname={}'
                        .format(astext, shortname))
                    if astext:
                        # Markdown
                        shortname = shortname[:-2]
                        inside = 'markdown'
                        cells.append(['markdown', 'code', ['\n']])
                        cells[-1][2].append('```%s\n' % fullname)
                    else:
                        # Code cell
                        if shortname in shortname2language:
                            fullname = shortname2language[shortname]
                        inside = 'codecell'
                        cells.append(['codecell', fullname, []])
                else:
                    logging.info('******* cell: markdown')
                    # Markdown cell
                    inside = 'markdown'
                    cells.append(['markdown', 'text', ['\n']])
            else:
                raise SyntaxError('Wrong syntax of cell delimiter:\n{}'
                    .format(repr(line)))
        else:
            # Ordinary line in a cell
            if inside in ('markdown', 'codecell'):
                cells[-1][2].append(line)
            else:
                raise SyntaxError('line\n {}\nhas no beginning cell delimiter'
                    .format(line))
    # Merge the lines in each cell to a string
    for i in range(len(cells)):
        if cells[i][0] == 'markdown' and cells[i][1] == 'code':
            # Add an ending ``` of code
            cells[i][2].append('```\n')
        cells[i][2] = '\n'.join(cells[i][2])
    # TODO: optional logging
    import pprint
    logging.info('******* cell data structure:\b%s' % pprint.pformat(cells))
    return cells

def write(cells):
    """Turn cells list into valid IPython notebook code."""
    # Use Jupyter nbformat functionality for writing the notebook

    from nbformat.v4 import (
        new_code_cell, new_markdown_cell, new_notebook, writes)
    nb_cells = []

    for cell_tp, language, block in cells:
        if cell_tp == 'markdown':
            nb_cells.append(
                new_markdown_cell(source=block))
        elif cell_tp == 'codecell':
            nb_cells.append(new_code_cell(source=block))

    nb = new_notebook(cells=nb_cells)
    filestr = writes(nb)
    return filestr

def driver():
    """Compile a document and its variables."""
    try:
        filename = sys.argv[1]
        with open(filename, 'r') as f:
            text = f.read()
    except (IndexError, IOError) as e:
        print('Usage: %s filename' % (sys.argv[0]))
        print(e)
        sys.exit(1)
    cells = read(text, argv=sys.argv[2:])
    filestr = write(cells)
    # Assuming file extension .gj (generate Jupyter); TODO: less strict
    filename = filename[:-3] + '.ipynb'
    with open(filename, 'w') as f:
        f.write(filestr)

if __name__ == '__main__':
    logfile = 'tmp.log'
    if os.path.isfile(logfile):
        os.remove(logfile)
    logging.basicConfig(format='%(message)s', level=logging.DEBUG,
                        filename=logfile)
    driver()
Commit	Line	Data
	1	#!/usr/bin/env python
	2
	3	import sys, os, re, logging
	4
	5	# Languages mapping as used by markdown/pandoc
	6	shortname2language = dict(
	7	c='C',
	8	cpp='Cpp',
	9	f='Fortran',
	10	html='HTML',
	11	js='JavaScript',
	12	r='R',
	13	rb='Ruby',
	14	pl='Perl',
	15	py='Python',
	16	sh='Bash',
	17	tex='Tex',
	18	)
	19
	20	def read(text, argv=sys.argv[2:]):
	21	lines = text.splitlines()
	22	# First read all include statements
	23	for i in range(len(lines)):
	24	if lines[i].startswith('#include "'):
	25	filename = lines[i].split('"')[1]
	26	with open(filename, 'r') as f:
	27	include_text = f.read()
	28	lines[i] = include_text
	29	text = '\n'.join(lines)
	30	logging.info('******* text after include:\n{}'.format(text))
	31
	32	# Run Mako
	33	mako_kwargs = {}
	34	for arg in argv:
	35	key, value = arg.split('=')
	36	mako_kwargs[key] = value
	37
	38	try:
	39	import mako
	40	has_mako = True
	41	except ImportError:
	42	print('Cannot import mako - mako is not run')
	43	has_mako = False
	44
	45	if has_mako:
	46	from mako.template import Template
	47	from mako.lookup import TemplateLookup
	48	lookup = TemplateLookup(directories=[os.curdir])
	49	# text = text.encode('utf-8')
	50	temp = Template(text=text, lookup=lookup, strict_undefined=True)
	51	logging.info('******* mako_kwargs: {}'.format(str(mako_kwargs)))
	52	text = temp.render(**mako_kwargs)
	53
	54	logging.info('******* text after mako:\n{}'.format(text))
	55
	56	# Parse the cells
	57	lines = text.splitlines()
	58	cells = []
	59	inside = None # indicates which type of cell we are inside
	60	fullname = None # full language name in code cells
	61	for line in lines:
	62	if line.startswith('-----'):
	63	# New cell, what type?
	64	m = re.search(r'-----([a-z0-9-]+)?', line)
	65	if m:
	66	shortname = m.group(1)
	67	if shortname:
	68	# Check if code is to be typeset as static
	69	# Markdown code (e.g., shortname=py-t)
	70	logging.info('******* found shortname {}'
	71	.format(shortname))
	72	astext = shortname[-2:] == '-t'
	73	logging.info('******* cell: astext={} shortname={}'
	74	.format(astext, shortname))
	75	if astext:
	76	# Markdown
	77	shortname = shortname[:-2]
	78	inside = 'markdown'
	79	cells.append(['markdown', 'code', ['\n']])
	80	cells[-1][2].append('```%s\n' % fullname)
	81	else:
	82	# Code cell
	83	if shortname in shortname2language:
	84	fullname = shortname2language[shortname]
	85	inside = 'codecell'
	86	cells.append(['codecell', fullname, []])
	87	else:
	88	logging.info('******* cell: markdown')
	89	# Markdown cell
	90	inside = 'markdown'
	91	cells.append(['markdown', 'text', ['\n']])
	92	else:
	93	raise SyntaxError('Wrong syntax of cell delimiter:\n{}'
	94	.format(repr(line)))
	95	else:
	96	# Ordinary line in a cell
	97	if inside in ('markdown', 'codecell'):
	98	cells[-1][2].append(line)
	99	else:
	100	raise SyntaxError('line\n {}\nhas no beginning cell delimiter'
	101	.format(line))
	102	# Merge the lines in each cell to a string
	103	for i in range(len(cells)):
	104	if cells[i][0] == 'markdown' and cells[i][1] == 'code':
	105	# Add an ending ``` of code
	106	cells[i][2].append('```\n')
	107	cells[i][2] = '\n'.join(cells[i][2])
	108	# TODO: optional logging
	109	import pprint
	110	logging.info('******* cell data structure:\b%s' % pprint.pformat(cells))
	111	return cells
	112
	113	def write(cells):
	114	"""Turn cells list into valid IPython notebook code."""
	115	# Use Jupyter nbformat functionality for writing the notebook
	116
	117	from nbformat.v4 import (
	118	new_code_cell, new_markdown_cell, new_notebook, writes)
	119	nb_cells = []
	120
	121	for cell_tp, language, block in cells:
	122	if cell_tp == 'markdown':
	123	nb_cells.append(
	124	new_markdown_cell(source=block))
	125	elif cell_tp == 'codecell':
	126	nb_cells.append(new_code_cell(source=block))
	127
	128	nb = new_notebook(cells=nb_cells)
	129	filestr = writes(nb)
	130	return filestr
	131
	132	def driver():
	133	"""Compile a document and its variables."""
	134	try:
	135	filename = sys.argv[1]
	136	with open(filename, 'r') as f:
	137	text = f.read()
	138	except (IndexError, IOError) as e:
	139	print('Usage: %s filename' % (sys.argv[0]))
	140	print(e)
	141	sys.exit(1)
	142	cells = read(text, argv=sys.argv[2:])
	143	filestr = write(cells)
	144	# Assuming file extension .gj (generate Jupyter); TODO: less strict
	145	filename = filename[:-3] + '.ipynb'
	146	with open(filename, 'w') as f:
	147	f.write(filestr)
	148
	149	if __name__ == '__main__':
	150	logfile = 'tmp.log'
	151	if os.path.isfile(logfile):
	152	os.remove(logfile)
	153	logging.basicConfig(format='%(message)s', level=logging.DEBUG,
	154	filename=logfile)
	155	driver()