ce546ad471c192f1f9934ced962ca4c3d5596d70
[talweg.git] / reports / ipynb_generator.py
1 #!/usr/bin/env python
2
3 import sys, os, re
4
5 # Languages mapping as used by markdown/pandoc
6 shortname2language = dict(
7 c='C',
8 cpp='Cpp',
9 f='Fortran',
10 html='HTML',
11 js='JavaScript',
12 r='R',
13 rb='Ruby',
14 pl='Perl',
15 py='Python',
16 sh='Bash',
17 tex='Tex',
18 )
19
20 def read(text, argv=sys.argv[3:]):
21 lines = text.splitlines()
22 # First read all include statements
23 for i in range(len(lines)):
24 if lines[i].startswith('#include "'):
25 filename = lines[i].split('"')[1]
26 with open(filename, 'r') as f:
27 include_text = f.read()
28 lines[i] = include_text
29 text = '\n'.join(lines)
30
31 # Run Mako
32 mako_kwargs = {}
33 for arg in argv:
34 key, value = arg.split('=')
35 mako_kwargs[key] = value
36
37 try:
38 import mako
39 has_mako = True
40 except ImportError:
41 print('Cannot import mako - mako is not run')
42 has_mako = False
43
44 if has_mako:
45 from mako.template import Template
46 from mako.lookup import TemplateLookup
47 lookup = TemplateLookup(directories=[os.curdir])
48 # text = text.encode('utf-8')
49 temp = Template(text=text, lookup=lookup, strict_undefined=True)
50 text = temp.render(**mako_kwargs)
51
52 # Parse the cells
53 lines = text.splitlines()
54 cells = []
55 inside = None # indicates which type of cell we are inside
56 fullname = None # full language name in code cells
57 for line in lines:
58 if line.startswith('-----'):
59 # New cell, what type?
60 m = re.search(r'-----([a-z0-9-]+)?', line)
61 if m:
62 shortname = m.group(1)
63 if shortname:
64 # Check if code is to be typeset as static
65 # Markdown code (e.g., shortname=py-t)
66 astext = shortname[-2:] == '-t'
67 if astext:
68 # Markdown
69 shortname = shortname[:-2]
70 inside = 'markdown'
71 cells.append(['markdown', 'code', ['\n']])
72 cells[-1][2].append('```%s\n' % fullname)
73 else:
74 # Code cell
75 if shortname in shortname2language:
76 fullname = shortname2language[shortname]
77 inside = 'codecell'
78 cells.append(['codecell', fullname, []])
79 else:
80 # Markdown cell
81 inside = 'markdown'
82 cells.append(['markdown', 'text', ['\n']])
83 else:
84 raise SyntaxError('Wrong syntax of cell delimiter:\n{}'
85 .format(repr(line)))
86 else:
87 # Ordinary line in a cell
88 if inside in ('markdown', 'codecell'):
89 cells[-1][2].append(line)
90 else:
91 raise SyntaxError('line\n {}\nhas no beginning cell delimiter'
92 .format(line))
93 # Merge the lines in each cell to a string
94 for i in range(len(cells)):
95 if cells[i][0] == 'markdown' and cells[i][1] == 'code':
96 # Add an ending ``` of code
97 cells[i][2].append('```\n')
98 cells[i][2] = '\n'.join(cells[i][2])
99 return cells
100
101 def write(cells):
102 """Turn cells list into valid IPython notebook code."""
103 # Use Jupyter nbformat functionality for writing the notebook
104
105 from nbformat.v4 import (
106 new_code_cell, new_markdown_cell, new_notebook, writes)
107 nb_cells = []
108
109 for cell_tp, language, block in cells:
110 if cell_tp == 'markdown':
111 nb_cells.append(
112 new_markdown_cell(source=block))
113 elif cell_tp == 'codecell':
114 nb_cells.append(new_code_cell(source=block))
115
116 nb = new_notebook(cells=nb_cells)
117 filestr = writes(nb)
118 return filestr
119
120 def driver():
121 """Compile a document and its variables."""
122 try:
123 inputfile = sys.argv[1]
124 with open(inputfile, 'r') as f:
125 text = f.read()
126 outputfile = '-' if len(sys.argv) <= 2 else sys.argv[2]
127 except (IndexError, IOError) as e:
128 print('Usage: %s inputfile [outputfile|- [Mako args]]' % (sys.argv[0]))
129 print(e)
130 sys.exit(1)
131 cells = read(text, argv=sys.argv[3:])
132 filestr = write(cells)
133 # Assuming file extension .gj (generate Jupyter); TODO: less strict
134 outputfile = inputfile[:-3]+'.ipynb' if outputfile == '-' else outputfile
135 with open(outputfile, 'w') as f:
136 f.write(filestr)
137
138 if __name__ == '__main__':
139 driver()