From 0872a96a88dbf3d7647e6e78612cb9b7ed300428 Mon Sep 17 00:00:00 2001
From: Matt Bauman <mbauman@gmail.com>
Date: Fri, 19 Jul 2019 19:16:12 -0400
Subject: [PATCH] RFC: Add support for Jupyter cell metadata via %% syntax
 (#43)

Admittedly this is a little strange: it adds support for a special first-line
syntax that only applies to Jupyter notebook outputs. But this %% format is
somewhat standard across notebook-generating tools, and the ability to protect
it with a `#nb` leader makes it possible to specifically target this line to a
notebook output. We aim to use something like this for generation of notebooks
with support for nbgrader's metadata extensions, but it could also be used to
specify cell types for Jupyter notebook presentations \(with presentation
extensions\) or other such fun. Fixes #67.

Co-authored-by: Matt Bauman <mbauman@gmail.com>
Co-authored-by: Fredrik Ekre <ekrefredrik@gmail.com>
---
 docs/src/outputformats.md | 16 +++++++++++++++
 src/Literate.jl           | 43 +++++++++++++++++++++++++++------------
 test/runtests.jl          | 19 +++++++++++++++++
 3 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/docs/src/outputformats.md b/docs/src/outputformats.md
index d300da3..012dfdd 100644
--- a/docs/src/outputformats.md
+++ b/docs/src/outputformats.md
@@ -51,6 +51,22 @@ arguments to [`Literate.notebook`](@ref):
 Literate.notebook
 ```
 
+### Notebook metadata
+
+Jupyter notebook cells (both code cells and markdown cells) can contain metadata. This is enabled
+in Literate by the `%%` token, similar to
+[Jupytext](https://jupytext.readthedocs.io/en/latest/formats.html#the-percent-format).
+The format is as follows
+
+```
+%% optional ignored text [type] {optional metadata JSON}
+```
+
+Cell metadata can, for example, be used for
+[nbgrader](https://nbgrader.readthedocs.io/en/stable/contributor_guide/metadata.html)
+and the [reveal.js](https://github.com/hakimel/reveal.js) notebook extension
+[RISE](https://github.com/damianavila/RISE).
+
 
 ## [**4.3.** Script Output](@id Script-Output)
 
diff --git a/src/Literate.jl b/src/Literate.jl
index 3f1d0aa..b95a013 100644
--- a/src/Literate.jl
+++ b/src/Literate.jl
@@ -398,6 +398,19 @@ end
 
 const JUPYTER_VERSION = v"4.3.0"
 
+parse_nbmeta(line::Pair) = parse_nbmeta(line.second)
+function parse_nbmeta(line)
+    # Format: %% optional ignored text [type] {optional metadata JSON}
+    # Cf. https://jupytext.readthedocs.io/en/latest/formats.html#the-percent-format
+    m = match(r"^%% ([^[{]+)?\s*(?:\[(\w+)\])?\s*(\{.*)?$", line)
+    typ = m.captures[2]
+    name = m.captures[1] === nothing ? Dict{String, String}() : Dict("name" => m.captures[1])
+    meta = m.captures[3] === nothing ? Dict{String, Any}() : JSON.parse(m.captures[3])
+    return typ, merge(name, meta)
+end
+line_is_nbmeta(line::Pair) = line_is_nbmeta(line.second)
+line_is_nbmeta(line) = startswith(line, "%% ")
+
 """
     Literate.notebook(inputfile, outputdir; kwargs...)
 
@@ -451,20 +464,24 @@ function notebook(inputfile, outputdir; preprocess = identity, postprocess = ide
     cells = []
     for chunk in chunks
         cell = Dict()
-        if isa(chunk, MDChunk)
-            cell["cell_type"] = "markdown"
-            cell["metadata"] = Dict()
-            lines = String[x.second for x in chunk.lines] # skip indent
-            @views map!(x -> x * '\n', lines[1:end-1], lines[1:end-1])
-            cell["source"] = lines
-            cell["outputs"] = []
-        else # isa(chunk, CodeChunk)
-            cell["cell_type"] = "code"
-            cell["metadata"] = Dict()
-            @views map!(x -> x * '\n', chunk.lines[1:end-1], chunk.lines[1:end-1])
-            cell["source"] = chunk.lines
+        chunktype = isa(chunk, MDChunk) ? "markdown" : "code"
+        if !isempty(chunk.lines) && line_is_nbmeta(chunk.lines[1])
+            metatype, metadata = parse_nbmeta(chunk.lines[1])
+            metatype !== nothing && metatype != chunktype && error("specifying a different cell type is not supported")
+            popfirst!(chunk.lines)
+        else
+            metadata = Dict{String, Any}()
+        end
+        lines = isa(chunk, MDChunk) ?
+                    String[x.second for x in chunk.lines] : # skip indent
+                    chunk.lines
+        @views map!(x -> x * '\n', lines[1:end-1], lines[1:end-1])
+        cell["cell_type"] = chunktype
+        cell["metadata"] = metadata
+        cell["source"] = lines
+        cell["outputs"] = []
+        if chunktype == "code"
             cell["execution_count"] = nothing
-            cell["outputs"] = []
         end
         push!(cells, cell)
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index 8aff492..f735fbe 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -208,6 +208,18 @@ content = """
     #+
         ## Indented comment
     end
+
+    #nb # A notebook cell with special metadata
+    #nb %% Meta1 {"meta": "data"}
+    #nb 1+1
+    #nb #-
+    #nb # A explicit code notebook cell
+    #nb #-
+    #nb %% [code]
+    #nb 1+2
+    #nb #-
+    #nb # %% [markdown] {"meta": "data"}
+    #nb # # Explicit markdown cell with metadata
     """
 
 @testset "Literate.script" begin
@@ -584,6 +596,12 @@ end
                ]
             """,
 
+            """
+               "metadata": {
+                "meta": "data"
+               }
+            """,
+
             """
                "source": [
                 "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*"
@@ -691,6 +709,7 @@ end
             r = try
                 Literate.notebook(inputfile, outdir)
             catch err
+                @info "^^ the above error log message is expected ^^"
                 err
             end
             @test isa(r, ErrorException)