From 74310304bd6736066746d9fae004602419f2814a Mon Sep 17 00:00:00 2001 From: "Zed A. Shaw" Date: Thu, 31 Oct 2024 02:53:38 -0400 Subject: [PATCH] Start of a ragel parser that can do the ansi code parsing for me. --- meson.build | 1 + scratchpad/testragel.cpp | 215 ++++++++++++++++++++++++++++----------- scratchpad/testragel.rl | 66 ++++++++++-- 3 files changed, 215 insertions(+), 67 deletions(-) diff --git a/meson.build b/meson.build index 6dc59e2..bf2b93b 100644 --- a/meson.build +++ b/meson.build @@ -49,6 +49,7 @@ collider = executable('collider', [ dependencies: dependencies) testragel = executable('testragel', [ + 'dbc.cpp', './scratchpad/testragel.cpp' ], dependencies: dependencies) diff --git a/scratchpad/testragel.cpp b/scratchpad/testragel.cpp index 4bab31b..7369e77 100644 --- a/scratchpad/testragel.cpp +++ b/scratchpad/testragel.cpp @@ -1,88 +1,117 @@ #line 1 ".\\scratchpad\\testragel.rl" #include +#include +#include +#include +#include +#include "dbc.hpp" using namespace fmt; -#line 11 ".\\scratchpad\\testragel.rl" +#line 40 ".\\scratchpad\\testragel.rl" -#line 8 ".\\scratchpad\\testragel.cpp" +#line 13 ".\\scratchpad\\testragel.cpp" static const char _foo_actions[] = { 0, 1, 0, 1, 1, 1, 2, 1, - 3, 1, 4, 1, 5 + 3, 1, 4, 1, 5, 1, 6, 1, + 9, 1, 10, 1, 11, 2, 1, 7, + 2, 1, 8, 2, 1, 9, 2, 1, + 10, 2, 4, 0 }; static const char _foo_key_offsets[] = { - 0, 0, 1, 3, 5, 8, 10, 13, - 14, 16, 19, 21, 24, 25 + 0, 0, 1, 3, 5, 6, 7, 12, + 15, 17, 19, 22, 24 }; static const char _foo_trans_keys[] = { - 27, 65, 91, 48, 57, 59, 48, 57, - 48, 57, 109, 48, 57, 0, 48, 57, - 59, 48, 57, 48, 57, 109, 48, 57, - 0, 0 + 91, 48, 57, 50, 53, 59, 59, 27, + 59, 109, 48, 57, 59, 48, 57, 48, + 57, 48, 57, 59, 48, 57, 48, 57, + 59, 48, 57, 0 }; static const char _foo_single_lengths[] = { - 0, 1, 2, 0, 1, 0, 1, 1, - 0, 1, 0, 1, 1, 0 + 0, 1, 0, 2, 1, 1, 3, 1, + 0, 0, 1, 0, 1 }; static const char _foo_range_lengths[] = { - 0, 0, 0, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 0, 0 + 0, 0, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1 }; static const char _foo_index_offsets[] = { - 0, 0, 2, 5, 7, 10, 12, 15, - 17, 19, 22, 24, 27, 29 -}; - -static const char _foo_indicies[] = { - 0, 1, 2, 3, 1, 4, 1, 5, - 4, 1, 6, 1, 7, 6, 1, 8, - 1, 9, 1, 10, 9, 1, 11, 1, - 12, 11, 1, 13, 1, 1, 0 + 0, 0, 2, 4, 7, 9, 11, 16, + 19, 21, 23, 26, 28 }; static const char _foo_trans_targs[] = { - 2, 0, 3, 8, 4, 5, 6, 7, - 13, 9, 10, 11, 12, 13 + 6, 0, 8, 6, 4, 5, 0, 9, + 0, 11, 0, 1, 3, 6, 7, 0, + 2, 7, 6, 8, 6, 10, 6, 9, + 10, 6, 12, 6, 11, 12, 6, 6, + 6, 6, 6, 6, 6, 6, 0 }; static const char _foo_trans_actions[] = { - 0, 0, 0, 0, 7, 0, 9, 0, - 11, 1, 0, 3, 0, 5 + 11, 0, 1, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 13, 33, 0, + 3, 9, 21, 0, 24, 1, 17, 3, + 0, 30, 1, 15, 3, 0, 27, 19, + 21, 24, 17, 30, 15, 27, 0 +}; + +static const char _foo_to_state_actions[] = { + 0, 0, 0, 0, 0, 0, 5, 0, + 0, 0, 0, 0, 0 +}; + +static const char _foo_from_state_actions[] = { + 0, 0, 0, 0, 0, 0, 7, 0, + 0, 0, 0, 0, 0 }; -static const int foo_start = 1; -static const int foo_first_final = 13; +static const char _foo_eof_trans[] = { + 0, 0, 32, 0, 0, 0, 0, 33, + 34, 35, 36, 37, 38 +}; + +static const int foo_start = 6; +static const int foo_first_final = 6; static const int foo_error = 0; -static const int foo_en_main = 1; +static const int foo_en_main = 6; -#line 14 ".\\scratchpad\\testragel.rl" +#line 43 ".\\scratchpad\\testragel.rl" -int main() { - int cs, res = 0; - char *test = "\x1B[36;46m"; +void parse_ansi(std::string_view &codes) { + const char *start = NULL; + int cs = 0; + size_t act = 0; + const char *p = codes.data(); + const char *pe = p + codes.size(); + const char *ts = p; + const char *te = pe; + const char *eof = pe; - char *p = test; - char *pe = p + strlen(p) + 1; -#line 70 ".\\scratchpad\\testragel.cpp" +#line 96 ".\\scratchpad\\testragel.cpp" { cs = foo_start; + ts = 0; + te = 0; + act = 0; } -#line 22 ".\\scratchpad\\testragel.rl" +#line 55 ".\\scratchpad\\testragel.rl" -#line 73 ".\\scratchpad\\testragel.cpp" +#line 102 ".\\scratchpad\\testragel.cpp" { int _klen; unsigned int _trans; @@ -95,6 +124,18 @@ int main() { if ( cs == 0 ) goto _out; _resume: + _acts = _foo_actions + _foo_from_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 3: +#line 1 "NONE" + {ts = p;} + break; +#line 121 ".\\scratchpad\\testragel.cpp" + } + } + _keys = _foo_trans_keys + _foo_key_offsets[cs]; _trans = _foo_index_offsets[cs]; @@ -144,7 +185,7 @@ _resume: } _match: - _trans = _foo_indicies[_trans]; +_eof_trans: cs = _foo_trans_targs[_trans]; if ( _foo_trans_actions[_trans] == 0 ) @@ -157,44 +198,102 @@ _match: switch ( *_acts++ ) { case 0: -#line 7 ".\\scratchpad\\testragel.rl" - { println("NUM1"); } +#line 13 ".\\scratchpad\\testragel.rl" + { + start = p; + } break; case 1: -#line 7 ".\\scratchpad\\testragel.rl" - { println("NUM2"); } - break; - case 2: -#line 7 ".\\scratchpad\\testragel.rl" - { res = 1; } - break; - case 3: -#line 8 ".\\scratchpad\\testragel.rl" - { println("NUM1"); } +#line 17 ".\\scratchpad\\testragel.rl" + { + size_t value = 0; + auto [ptr, ec] = std::from_chars(start, p, value); + dbc::check(ec == std::errc(), "error in number parsing"); + println("NUMBER {}", value); + } break; case 4: -#line 8 ".\\scratchpad\\testragel.rl" - { println("NUM2"); } +#line 1 "NONE" + {te = p+1;} break; case 5: -#line 8 ".\\scratchpad\\testragel.rl" - { res = 2; } +#line 33 ".\\scratchpad\\testragel.rl" + {te = p+1;{ println("START"); }} break; -#line 164 ".\\scratchpad\\testragel.cpp" + case 6: +#line 38 ".\\scratchpad\\testragel.rl" + {te = p+1;{ println("END"); }} + break; + case 7: +#line 34 ".\\scratchpad\\testragel.rl" + {te = p;p--;{ println("single"); }} + break; + case 8: +#line 35 ".\\scratchpad\\testragel.rl" + {te = p;p--;{ println("basic"); }} + break; + case 9: +#line 36 ".\\scratchpad\\testragel.rl" + {te = p;p--;{ println("256 color"); }} + break; + case 10: +#line 37 ".\\scratchpad\\testragel.rl" + {te = p;p--;{ println("true color"); }} + break; + case 11: +#line 34 ".\\scratchpad\\testragel.rl" + {{p = ((te))-1;}{ println("single"); }} + break; +#line 222 ".\\scratchpad\\testragel.cpp" } } _again: + _acts = _foo_actions + _foo_to_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 2: +#line 1 "NONE" + {ts = 0;} + break; +#line 233 ".\\scratchpad\\testragel.cpp" + } + } + if ( cs == 0 ) goto _out; if ( ++p != pe ) goto _resume; _test_eof: {} + if ( p == eof ) + { + if ( _foo_eof_trans[cs] > 0 ) { + _trans = _foo_eof_trans[cs] - 1; + goto _eof_trans; + } + } + _out: {} } -#line 23 ".\\scratchpad\\testragel.rl" +#line 56 ".\\scratchpad\\testragel.rl" +} + +int main() { + // possibly put alphtype unsigned int? + std::vector tests = { + "\x1B[;5;78;98m", + "\x1B[;2;36;46;23m", + "\x1B[36;46m", + "\x1B[36m", + }; + + + for(auto test : tests) { + println("--- PARSING"); + parse_ansi(test); + } - fmt::println("result = {}", res); return 0; } diff --git a/scratchpad/testragel.rl b/scratchpad/testragel.rl index 6eafa6a..09570bf 100644 --- a/scratchpad/testragel.rl +++ b/scratchpad/testragel.rl @@ -1,26 +1,74 @@ #include +#include +#include +#include +#include +#include "dbc.hpp" using namespace fmt; %%{ machine foo; - test1 = 0x1B . "[" . [0-9]+ @{ println("NUM1"); } . ";" . [0-9]+ @{ println("NUM2"); } . "m" 0 @{ res = 1; }; - test2 = 0x1B "A" [0-9]+ @{ println("NUM1"); } ";" [0-9]+ @{ println("NUM2"); } "m" 0 @{ res = 2; }; - main := (test1 | test2); + action tstart { + start = fpc; + } + + action number { + size_t value = 0; + auto [ptr, ec] = std::from_chars(start, fpc, value); + dbc::check(ec == std::errc(), "error in number parsing"); + println("NUMBER {}", value); + } + + start = 0x1B "["; + num = digit+ >tstart %number; + + color256 = ";5;" (num ";"?)**; + color24b = ";2;" (num ";"?)**; + basic = num ";" num; + single = num; + + main := |* + start => { println("START"); }; + single => { println("single"); }; + basic => { println("basic"); }; + color256 => { println("256 color"); }; + color24b => { println("true color"); }; + "m" => { println("END"); }; + *|; }%% %% write data; -int main() { - int cs, res = 0; - char *test = "\x1B[36;46m"; +void parse_ansi(std::string_view &codes) { + const char *start = NULL; + int cs = 0; + size_t act = 0; + const char *p = codes.data(); + const char *pe = p + codes.size(); + const char *ts = p; + const char *te = pe; + const char *eof = pe; - char *p = test; - char *pe = p + strlen(p) + 1; %% write init; %% write exec; +} + +int main() { + // possibly put alphtype unsigned int? + std::vector tests = { + "\x1B[;5;78;98m", + "\x1B[;2;36;46;23m", + "\x1B[36;46m", + "\x1B[36m", + }; + + + for(auto test : tests) { + println("--- PARSING"); + parse_ansi(test); + } - fmt::println("result = {}", res); return 0; }