From 664de68d9d92301e78ed7cb2b1164ae9f77122ae Mon Sep 17 00:00:00 2001 From: "Zed A. Shaw" Date: Sat, 16 Aug 2025 14:30:01 -0400 Subject: [PATCH] Started a new parser using ragel. It can parse the slides but without json. Need to re-read the ragel manual to get it fully working. --- Makefile | 8 +- backup_parser.cpp | 62 +++++++++ md_parser.cpp | 222 ++++++++++++++++++++++++++++++ md_parser.rl | 65 +++++++++ meson.build | 8 +- parser.cpp | 66 ++------- parser.hpp | 10 ++ sample/01-a-good-first-program.md | 26 ++-- sample/with_json.md | 34 +++++ tests/parsing.cpp | 19 +++ 10 files changed, 448 insertions(+), 72 deletions(-) create mode 100644 backup_parser.cpp create mode 100644 md_parser.cpp create mode 100644 md_parser.rl create mode 100644 sample/with_json.md create mode 100644 tests/parsing.cpp diff --git a/Makefile b/Makefile index 46352a5..c095294 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,10 @@ else sh -x ./scripts/reset_build.sh endif -build: +%.cpp : %.rl + ragel -o $@ $< + +build: md_parser.cpp meson compile -j 10 -C builddir release_build: @@ -41,6 +44,9 @@ debug_run: build clean: meson compile --clean -C builddir +test: build + ./builddir/runtests -e + debug_test: build gdb --nx -x .gdbinit --ex run --args builddir/runtests -e diff --git a/backup_parser.cpp b/backup_parser.cpp new file mode 100644 index 0000000..68b2966 --- /dev/null +++ b/backup_parser.cpp @@ -0,0 +1,62 @@ +#include "parser.hpp" +#include "dbc.hpp" +#include +#include +#include +#include +#include +#include +#include + +using std::shared_ptr, std::string, nlohmann::json, std::string_view; +namespace fs = std::filesystem; + +shared_ptr parse_slides(const string& md_file) { + shared_ptr deck = std::make_shared(); + + dbc::check(fs::exists(md_file), "md file missing"); + + auto size = fs::file_size(md_file); + string line(size, '\0'); + bool started = false; + json config; + + if(std::ifstream in_file{md_file, std::ios::binary}) { + while(std::getline(in_file, line)) { + if(line == "{") { + string json_data; + + do { + json_data += line; + } while (std::getline(in_file, line) && line != "}"); + + json_data += "}"; + + config = json::parse(json_data); + std::cout << "JSON: " << config << '\n'; + } else if(line == "===") { + deck->config = config; + fmt::println("START"); + started = true; + } else if(line == "---") { + fmt::println("START SLIDE"); + } else { + if(started) { + string title = line; + string content; + + while(std::getline(in_file, line) && line != "---") { + content += line + "\n"; + } + + deck->slides.emplace_back(title, content, config); + config = json::parse("{}"); + } else { + fmt::println("JUNK: {}", line); + } + } + } + } + + return deck; +} diff --git a/md_parser.cpp b/md_parser.cpp new file mode 100644 index 0000000..3325da5 --- /dev/null +++ b/md_parser.cpp @@ -0,0 +1,222 @@ + +#line 1 "md_parser.rl" +#include +#include + + +#line 38 "md_parser.rl" + + + +#line 7 "md_parser.cpp" +static const char _Parser_actions[] = { + 0, 1, 0, 1, 3, 1, 4, 2, + 0, 1, 2, 0, 2, 2, 1, 0, + 2, 2, 0, 2, 3, 5 +}; + +static const char _Parser_key_offsets[] = { + 0, 0, 1, 2, 3, 4, 10, 11, + 18, 22, 29, 31, 33, 34 +}; + +static const char _Parser_trans_keys[] = { + 61, 61, 61, 10, 10, 32, 35, 42, + 9, 13, 10, 10, 32, 35, 42, 45, + 9, 13, 10, 32, 9, 13, 10, 32, + 35, 42, 45, 9, 13, 10, 45, 10, + 45, 10, 10, 32, 35, 42, 45, 9, + 13, 0 +}; + +static const char _Parser_single_lengths[] = { + 0, 1, 1, 1, 1, 4, 1, 5, + 2, 5, 2, 2, 1, 5 +}; + +static const char _Parser_range_lengths[] = { + 0, 0, 0, 0, 0, 1, 0, 1, + 1, 1, 0, 0, 0, 1 +}; + +static const char _Parser_index_offsets[] = { + 0, 0, 2, 4, 6, 8, 14, 16, + 23, 27, 34, 37, 40, 42 +}; + +static const char _Parser_indicies[] = { + 0, 1, 2, 1, 3, 1, 4, 1, + 7, 6, 8, 9, 6, 5, 11, 10, + 7, 6, 8, 9, 12, 6, 5, 13, + 6, 6, 5, 7, 6, 14, 15, 12, + 6, 5, 11, 16, 10, 11, 17, 10, + 18, 10, 7, 6, 8, 9, 12, 6, + 5, 0 +}; + +static const char _Parser_trans_targs[] = { + 2, 0, 3, 4, 5, 6, 8, 9, + 8, 8, 6, 7, 10, 9, 8, 8, + 11, 12, 13 +}; + +static const char _Parser_trans_actions[] = { + 0, 0, 0, 0, 5, 1, 1, 0, + 13, 16, 0, 3, 1, 3, 7, 10, + 0, 0, 19 +}; + +static const int Parser_start = 1; +static const int Parser_first_final = 13; +static const int Parser_error = 0; + +static const int Parser_en_main = 1; + + +#line 41 "md_parser.rl" + + +bool Parser::parse(const std::string& input) { + int cs = 0; + const char *start = nullptr; + const char *begin = input.data(); + const char *p = input.data(); + const char *pe = p + input.size(); + const char *eof = p + input.size(); (void)eof; + std::string tk; + + +#line 81 "md_parser.cpp" + { + cs = Parser_start; + } + +#line 53 "md_parser.rl" + +#line 84 "md_parser.cpp" + { + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _Parser_trans_keys + _Parser_key_offsets[cs]; + _trans = _Parser_index_offsets[cs]; + + _klen = _Parser_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _Parser_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + _trans = _Parser_indicies[_trans]; + cs = _Parser_trans_targs[_trans]; + + if ( _Parser_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _Parser_actions + _Parser_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: +#line 12 "md_parser.rl" + { start = p; } + break; + case 1: +#line 13 "md_parser.rl" + { fmt::println("TITLE"); } + break; + case 2: +#line 14 "md_parser.rl" + { fmt::println("ENUM"); } + break; + case 3: +#line 15 "md_parser.rl" + { + tk = input.substr(start - begin, p - start); + std::cout << "CONTENT:" << std::quoted(tk) << '\n'; + } + break; + case 4: +#line 19 "md_parser.rl" + { fmt::println("----- START"); } + break; + case 5: +#line 20 "md_parser.rl" + { fmt::println("END"); } + break; +#line 178 "md_parser.cpp" + } + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} + } + +#line 54 "md_parser.rl" + + bool good = pe - p == 0; + + if(good) { + finalize(); + } else { + error = true; + std::cout << "!!!!!!!!!!!!!!!!!!!!!! error at:"; + std::cout << p; + } + return good; +} diff --git a/md_parser.rl b/md_parser.rl new file mode 100644 index 0000000..774aa4d --- /dev/null +++ b/md_parser.rl @@ -0,0 +1,65 @@ +#include +#include + +%%{ + machine Parser; + alphtype char; + + action token { + tk = input.substr(start - begin, fpc - start); + std::cout << "TOKEN: " << std::quoted(tk) << '\n'; + } + action mark { start = fpc; } + action title { fmt::println("TITLE"); } + action enum { fmt::println("ENUM"); } + action content { + tk = input.substr(start - begin, fpc - start); + std::cout << "CONTENT:" << std::quoted(tk) << '\n'; + } + action start { fmt::println("----- START"); } + action end { fmt::println("END"); } + action plain { fmt::println("PLAIN"); } + + eol = "\n"; + start = "===" eol $start; + end = "---" eol $end; + pound = "#" $title; + asterisk = "*" $enum; + content = (any+ -- (eol|end)) >mark %content; + + title = pound space* content eol; + enum = asterisk space* content eol; + line = space* content eol; + blank = space* eol; + + slide = (title | enum | line | blank)+ end; + + main := start (slide)+; +}%% + +%% write data; + + +bool Parser::parse(const std::string& input) { + int cs = 0; + const char *start = nullptr; + const char *begin = input.data(); + const char *p = input.data(); + const char *pe = p + input.size(); + const char *eof = p + input.size(); (void)eof; + std::string tk; + + %% write init; + %% write exec; + + bool good = pe - p == 0; + + if(good) { + finalize(); + } else { + error = true; + std::cout << "!!!!!!!!!!!!!!!!!!!!!! error at:"; + std::cout << p; + } + return good; +} diff --git a/meson.build b/meson.build index 2e74139..db3f3d7 100644 --- a/meson.build +++ b/meson.build @@ -82,17 +82,17 @@ dependencies += [ sources = [ 'dbc.cpp', 'backend.cpp', - 'main.cpp', 'control_ui.cpp', 'slides_ui.cpp', - 'parser.cpp' + 'parser.cpp', + 'backup_parser.cpp', ] tests = [ - 'tests/sample.cpp' + 'tests/parsing.cpp' ] -executable('besos', sources, +executable('besos', sources + ['main.cpp'], cpp_args: cpp_args, link_args: link_args, override_options: exe_defaults, diff --git a/parser.cpp b/parser.cpp index d526105..cc73a4e 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1,60 +1,24 @@ -#include "parser.hpp" -#include "dbc.hpp" -#include +#include +#include +#include #include #include -#include -#include - -using std::shared_ptr, std::string, nlohmann::json; -namespace fs = std::filesystem; - -shared_ptr parse_slides(const string& md_file) { - shared_ptr deck = std::make_shared(); - - dbc::check(fs::exists(md_file), "md file missing"); - - auto size = fs::file_size(md_file); - string line(size, '\0'); - bool started = false; - json config; - - if(std::ifstream in_file{md_file, std::ios::binary}) { - while(std::getline(in_file, line)) { - if(line == "{") { - string json_data; +#include "parser.hpp" +#include "dbc.hpp" - do { - json_data += line; - } while (std::getline(in_file, line) && line != "}"); +#include "./md_parser.cpp" - json_data += "}"; +std::string load_file(const std::string& md_file) { + std::ifstream in_file{md_file, std::ios::binary}; + dbc::check(bool(in_file), fmt::format("failed to load {}", md_file)); - config = json::parse(json_data); - std::cout << "JSON: " << config << '\n'; - } else if(line == "===") { - deck->config = config; - fmt::println("START"); - started = true; - } else if(line == "---") { - fmt::println("START SLIDE"); - } else { - if(started) { - string title = line; - string content; + return std::string(std::istreambuf_iterator(in_file), std::istreambuf_iterator()); +} - while(std::getline(in_file, line) && line != "---") { - content += line + "\n"; - } +Parser::Parser() +{ - deck->slides.emplace_back(title, content, config); - config = json::parse("{}"); - } else { - fmt::println("JUNK: {}", line); - } - } - } - } +} - return deck; +void Parser::finalize() { } diff --git a/parser.hpp b/parser.hpp index 7aec05c..30ad7e0 100644 --- a/parser.hpp +++ b/parser.hpp @@ -4,3 +4,13 @@ #include "slides_ui.hpp" std::shared_ptr parse_slides(const std::string& md_file); + +std::string load_file(const std::string& md_file); + +struct Parser { + bool error = false; + + Parser(); + bool parse(const std::string& input); + void finalize(); +}; diff --git a/sample/01-a-good-first-program.md b/sample/01-a-good-first-program.md index 887b0db..c26754c 100644 --- a/sample/01-a-good-first-program.md +++ b/sample/01-a-good-first-program.md @@ -1,33 +1,27 @@ -{ - "title": "1: A Good First Program", - "summary": "If you skipped Exercise 0, then you are not doing this book right." -} === -1: A Good First Program +# 1: A Good First Program * The key is this * That's what we want --- -What You Should See +# What You Should See Yes, but with more feeling. --- -{ -"bg_color": [255, 0, 0, 255] -} -Study Drills +# Study Drills Besos would be proud. --- -Common Student Questions +# Common Student Questions -- This again. -- That too. +* This again. +* That too. --- -The Blue Plus +# The Blue Plus _You must find it_. --- -The End +# The End -# See You Soon! +See You Soon! +--- diff --git a/sample/with_json.md b/sample/with_json.md new file mode 100644 index 0000000..6a7542b --- /dev/null +++ b/sample/with_json.md @@ -0,0 +1,34 @@ +{ + "title": "1: A Good First Program", + "summary": "If you skipped Exercise 0, then you are not doing this book right." +} +=== +# 1: A Good First Program + +* The key is this +* That's what we want +--- +# What You Should See + +Yes, but with more feeling. +--- +{ +"bg_color": [255, 0, 0, 255] +} +# Study Drills + +Besos would be proud. +--- +# Common Student Questions + +- This again. +- That too. +--- +# The Blue Plus + +_You must find it_. +--- +# The End + +See You Soon! +--- diff --git a/tests/parsing.cpp b/tests/parsing.cpp new file mode 100644 index 0000000..4169f92 --- /dev/null +++ b/tests/parsing.cpp @@ -0,0 +1,19 @@ +#include +#include "guecs/sfml/backend.hpp" +#include +#include +#include "parser.hpp" + +TEST_CASE("parse a simple example", "[parsing]") { + sfml::Backend backend; + guecs::init(&backend); + + auto data = parse_slides("./sample/01-a-good-first-program.md"); + REQUIRE(data->slides.size() > 0); + + auto contents = load_file("./sample/01-a-good-first-program.md"); + Parser rp; + rp.parse(contents); + + REQUIRE(!rp.error); +}