diff --git a/.github/workflows/rapidyaml.yml b/.github/workflows/rapidyaml.yml new file mode 100644 index 000000000..f27754ddd --- /dev/null +++ b/.github/workflows/rapidyaml.yml @@ -0,0 +1,117 @@ +name: rapidyaml + +defaults: + run: + shell: bash -xeo pipefail {0} +'on': + workflow_dispatch: null + push: + branches: + - main + pull_request: + +jobs: + + # check that the spec'ed version of rapidyaml passes its own tests + ryml: + runs-on: ubuntu-24.04 + if: always() + continue-on-error: false + strategy: + fail-fast: false + matrix: + include: + - bt: Debug + - bt: Release + steps: + - name: checkout (action) + uses: actions/checkout@v4 + with: {submodules: recursive, fetch-depth: 0} # use fetch-depth to ensure all tags are fetched + - name: checkout rapidyaml + run: | + cd rapidyaml/native + make rapidyaml + - name: configure + run: | + cd rapidyaml/native/rapidyaml + cmake -B build -D CMAKE_BUILD_TYPE=${{matrix.bt}} -D RYML_BUILD_TESTS=ON + - name: build + run: | + cd rapidyaml/native/rapidyaml + cmake --build build --target ryml-test-build --parallel --verbose + - name: run tests + run: | + cd rapidyaml/native/rapidyaml + cmake --build build --target ryml-test-run + + # run the c++ tests, also in Debug to test with assertions + cpp: + runs-on: ubuntu-24.04 + if: always() + continue-on-error: false + strategy: + fail-fast: false + matrix: + include: + - {bt: Debug, musl: 0} + - {bt: Debug, musl: 1} + - {bt: Release, musl: 0} + - {bt: Release, musl: 1} + env: + MKOPTS: RAPIDYAML_BUILD_TYPE=${{matrix.bt}} RAPIDYAML_MUSL=${{matrix.musl}} + steps: + - name: checkout (action) + uses: actions/checkout@v4 + with: {submodules: recursive, fetch-depth: 0} # use fetch-depth to ensure all tags are fetched + - name: check jni header up to date + run: make -C rapidyaml/native -B jni jnicheck + - name: get rapidyaml + run: make -C rapidyaml/native rapidyaml + - name: run c++ tests, static ----------------------------- + run: echo + - name: cfg c++, static + run: $MKOPTS make -C rapidyaml/native cfg-static + - name: build c++ tests, static + run: $MKOPTS make -C rapidyaml/native build-static + - name: run c++ tests, static + run: $MKOPTS make -C rapidyaml/native test-static + - name: run c++ tests, static with timing + run: $MKOPTS make -C rapidyaml/native test-static-timing + - name: run c++ tests, shared ----------------------------- + run: echo + - name: cfg c++, shared + run: $MKOPTS make -C rapidyaml/native cfg-shared + - name: build c++ tests, shared + run: $MKOPTS make -C rapidyaml/native build-shared + - name: run c++ tests, shared + run: $MKOPTS make -C rapidyaml/native test-shared + - name: run c++ tests, shared with timing + run: $MKOPTS make -C rapidyaml/native test-shared-timing + + # run the java tests, also in Debug to test with assertions + java: + runs-on: ubuntu-24.04 + if: always() + continue-on-error: false + strategy: + fail-fast: false + matrix: + include: + - {bt: Debug, musl: 0} + - {bt: Debug, musl: 1} + - {bt: Release, musl: 0} + - {bt: Release, musl: 1} + env: + MKOPTS: RAPIDYAML_BUILD_TYPE=${{matrix.bt}} RAPIDYAML_MUSL=${{matrix.musl}} + steps: + - name: checkout (action) + uses: actions/checkout@v4 + with: {submodules: recursive, fetch-depth: 0} # use fetch-depth to ensure all tags are fetched + - name: build lib + run: $MKOPTS make -C rapidyaml build + - name: build jar + run: $MKOPTS make -C rapidyaml jar + - name: install + run: $MKOPTS make -C rapidyaml install + - name: run java tests + run: $MKOPTS make -C rapidyaml test diff --git a/.github/workflows/ys.yml b/.github/workflows/ys.yml new file mode 100644 index 000000000..fd0ec886d --- /dev/null +++ b/.github/workflows/ys.yml @@ -0,0 +1,49 @@ +name: ys + +defaults: + run: + shell: bash -xeo pipefail {0} +'on': + workflow_dispatch: null + push: + branches: + - main + pull_request: + +jobs: + + # run ys tests, also in Debug to test with assertions + ys: + name: ys/${{matrix.ysparser}}/${{matrix.bt}} + runs-on: ubuntu-24.04 + if: always() + continue-on-error: false + strategy: + fail-fast: false + matrix: + include: + - {v: 1, ysparser: snake} + - {v: 1, ysparser: ryml, bt: Debug} + - {v: 1, ysparser: ryml, bt: Release} + steps: + - name: checkout (action) + uses: actions/checkout@v4 + with: {submodules: recursive, fetch-depth: 0} # use fetch-depth to ensure all tags are fetched + - name: run core tests + run: | + . .profile + RAPIDYAML_BUILD_TYPE=${{matrix.bt}} \ + YS_PARSER=${{matrix.ysparser}} \ + make test-core v=${{matrix.v}} + - name: run ys tests + run: | + . .profile + RAPIDYAML_BUILD_TYPE=${{matrix.bt}} \ + YS_PARSER=${{matrix.ysparser}} \ + make -C ys test-run v=${{matrix.v}} + - name: run ys tests ? + run: | + . .profile + RAPIDYAML_BUILD_TYPE=${{matrix.bt}} \ + YS_PARSER=${{matrix.ysparser}} \ + make test-ys v=${{matrix.v}} diff --git a/Makefile b/Makefile index b60d83b57..5194d66b8 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,14 @@ BINDINGS := \ rust \ DIRS := \ + rapidyaml \ core \ libyamlscript \ $(BINDINGS) \ ys \ BUILD_DIRS := \ + rapidyaml \ libyamlscript \ go \ nodejs \ @@ -293,6 +295,9 @@ sysclean: realclean ifeq (ingy,$(USER)) $(RM) -r $(HOME)/.m2 endif +ifeq (jpmag,$(USER)) + $(RM) -r $(HOME)/.m2 +endif $(DOCKER_BUILD): docker-build: $(DOCKER_BUILD) diff --git a/common/java.mk b/common/java.mk index bc9997d8c..f95a424de 100644 --- a/common/java.mk +++ b/common/java.mk @@ -5,6 +5,10 @@ export JAVA_HOME := $(GRAALVM_HOME) export PATH := $(JAVA_HOME)/bin:$(PATH) +MVN ?= mvn +JAR ?= $(GRAALVM_HOME)/bin/jar +JAVAC ?= $(GRAALVM_HOME)/bin/javac + YAMLSCRIPT_JAVA_INSTALLED := \ $(MAVEN_REPOSITORY)/org/yamlscript/yamlscript/maven-metadata-local.xml @@ -31,3 +35,6 @@ endif $(YAMLSCRIPT_JAVA_INSTALLED): $(YAMLSCRIPT_JAVA_SRC) $(MAKE) -C $(ROOT)/java install + +$(JAVAC): $(GRAALVM_INSTALLED) +$(JAR): $(GRAALVM_INSTALLED) diff --git a/common/vars.mk b/common/vars.mk index 6c551f7d3..363efe1c4 100644 --- a/common/vars.mk +++ b/common/vars.mk @@ -48,12 +48,15 @@ ifneq (,$(findstring linux,$(ostype))) GCC := gcc -std=gnu99 -fPIC -shared SO := so DY := + DOTLIB := a else ifneq (,$(findstring darwin,$(ostype))) IS_MACOS := true GCC := gcc -dynamiclib SO := dylib DY := DY + DOTLIB := a else + DOTLIB := lib $(error Unsupported OSTYPE: $(ostype)) endif @@ -83,9 +86,8 @@ CURL := $(shell command -v curl) TIME := time -p LIBYAMLSCRIPT_DIR := $(ROOT)/libyamlscript/lib -LIBRARY_PATH := $(LIBYAMLSCRIPT_DIR) -export $(DY)LD_LIBRARY_PATH := $(LIBRARY_PATH) -export LD_LIBRARY_PATH := $(LIBRARY_PATH) +LIBRARY_PATH := $(LIBYAMLSCRIPT_DIR):$(ROOT)/rapidyaml/native +export $(DY)LD_LIBRARY_PATH := $(LIBRARY_PATH):$(ROOT)/rapidyaml/native LIBYAMLSCRIPT_SO_NAME := $(LIBYAMLSCRIPT_DIR)/libyamlscript LIBYAMLSCRIPT_SO_FQNP := $(LIBYAMLSCRIPT_SO_NAME).$(SO).$(YS_VERSION) LIBYAMLSCRIPT_SO_BASE := $(LIBYAMLSCRIPT_DIR)/libyamlscript.$(SO) @@ -221,6 +223,41 @@ RELEASE_LYS_NAME := libyamlscript-$(YS_VERSION)-$(GRAALVM_ARCH) RELEASE_LYS_TAR := $(RELEASE_LYS_NAME).tar.xz +#------------------------------------------------------------------------------ +# RapidYAML variables: +#------------------------------------------------------------------------------ +RAPIDYAML := $(ROOT)/rapidyaml + +RAPIDYAML_VERSION := 0.8.0 +#RAPIDYAML_TAG ?= v$(RAPIDYAML_VERSION) +RAPIDYAML_TAG ?= 8c37616378aefd376690a19459c31a56ce596b5e +RAPIDYAML_REPO := https://github.com/biojppm/rapidyaml +RAPIDYAML_BUILD_TYPE ?= Release +RAPIDYAML_DBG ?= 0 +RAPIDYAML_TIMED ?= 1 +RAPIDYAML_MUSL ?= 1 +RAPIDYAML_JAVA := \ + $(ROOT)/rapidyaml/src/main/java/org/rapidyaml/Rapidyaml.java \ + $(ROOT)/rapidyaml/src/main/java/org/rapidyaml/Evt.java \ + $(ROOT)/rapidyaml/src/main/java/org/rapidyaml/YamlParseErrorException.java +RAPIDYAML_JNI_H := $(ROOT)/rapidyaml/native/org_rapidyaml_Rapidyaml.h +RAPIDYAML_SO := $(ROOT)/rapidyaml/native/librapidyaml.$(RAPIDYAML_VERSION).$(SO) +RAPIDYAML_LIB := $(ROOT)/rapidyaml/native/librapidyaml.$(DOTLIB) +RAPIDYAML_JAR := $(ROOT)/rapidyaml/target/rapidyaml-$(RAPIDYAML_VERSION).jar +RAPIDYAML_INSTALLED_DIR := \ + $(MAVEN_REPOSITORY)/org/rapidyaml/rapidyaml/$(RAPIDYAML_VERSION)/ +RAPIDYAML_INSTALLED := \ + $(RAPIDYAML_INSTALLED_DIR)/rapidyaml-$(RAPIDYAML_VERSION).jar + + +#------------------------------------------------------------------------------ +# Programs +#------------------------------------------------------------------------------ + +GIT ?= git +CMAKE ?= cmake + + #------------------------------------------------------------------------------ default:: diff --git a/core/Makefile b/core/Makefile index a6d28f544..c4591746f 100644 --- a/core/Makefile +++ b/core/Makefile @@ -6,11 +6,17 @@ include $(COMMON)/docker.mk export PATH := $(ROOT)/core/bin:$(PATH) #------------------------------------------------------------------------------ -build:: +build:: $(RAPIDYAML_SO) $(RAPIDYAML_INSTALLED) install test:: $(LEIN) build $< $@ +$(RAPIDYAML_SO): + $(MAKE) -C $(ROOT)/rapidyaml build + +$(RAPIDYAML_INSTALLED): + $(MAKE) -C $(ROOT)/rapidyaml install + Dockerfile:: $(COMMON) Makefile cat \ $> yaml-string - (.parseString parser) - (map ys-event) - (remove nil?) - rest) + events (if (System/getenv "YS_PARSER_TIME") + (time (parse-fn yaml-string)) + (parse-fn yaml-string)) [first-event & rest-events] events first-event-tag (:! first-event) first-event (if (and has-code-mode-shebang @@ -56,10 +54,20 @@ events (cons first-event rest-events)] (remove nil? events))) -(defn parse-test-case [yaml-string] - (->> yaml-string - parse - (remove (fn [ev] (= "DOC" (subs (:+ ev) 1)))))) +(declare snake-event) + +;; +;; SnakeYAML Parser +;; + +;; TODO - Set bigger buffer size in scanner class +(defn parse-snakeyaml [yaml-string] + (let [parser (new Parse (.build (LoadSettings/builder)))] + (->> yaml-string + (.parseString parser) + (map snake-event) + (remove nil?) + rest))) ;; ;; Functions to turn Java event objects into Clojure objects @@ -132,16 +140,105 @@ (let [obj (event-obj event)] (assoc obj :* (str (. event getAlias))))) -(defmulti ys-event class) -(defmethod ys-event DocumentStartEvent [event] (doc-start event)) -(defmethod ys-event DocumentEndEvent [event] (doc-end event)) -(defmethod ys-event MappingStartEvent [event] (map-start event)) -(defmethod ys-event MappingEndEvent [event] (map-end event)) -(defmethod ys-event SequenceStartEvent [event] (seq-start event)) -(defmethod ys-event SequenceEndEvent [event] (seq-end event)) -(defmethod ys-event ScalarEvent [event] (scalar-val event)) -(defmethod ys-event AliasEvent [event] (alias-val event)) -(defmethod ys-event :default [_] nil) +(defmulti snake-event class) +(defmethod snake-event DocumentStartEvent [event] (doc-start event)) +(defmethod snake-event DocumentEndEvent [event] (doc-end event)) +(defmethod snake-event MappingStartEvent [event] (map-start event)) +(defmethod snake-event MappingEndEvent [event] (map-end event)) +(defmethod snake-event SequenceStartEvent [event] (seq-start event)) +(defmethod snake-event SequenceEndEvent [event] (seq-end event)) +(defmethod snake-event ScalarEvent [event] (scalar-val event)) +(defmethod snake-event AliasEvent [event] (alias-val event)) +(defmethod snake-event :default [_] nil) + +;; +;; RapidYAML Parser +;; + +(defn event-type [mask] + (condp = (bit-and mask 2r11111111111) + Evt/BSTR nil + Evt/ESTR nil + Evt/BDOC "+DOC" + Evt/EDOC "-DOC" + Evt/BMAP "+MAP" + Evt/EMAP "-MAP" + Evt/BSEQ "+SEQ" + Evt/ESEQ "-SEQ" + Evt/SCLR "=VAL" + Evt/ALIA "=ALI" + nil)) + +(defmacro flag? [flag mask] + `(pos? (bit-and ~mask (. Evt ~flag)))) + +(defn get-skey [mask] + (condp = (bit-and mask 2r111110000000000000000) + Evt/PLAI := + Evt/SQUO :' + Evt/DQUO :$ + Evt/LITL :| + Evt/FOLD :> + nil)) + +(defn parse-rapidyaml [^String yaml-string] + (rest + (let [parser ^Rapidyaml (new Rapidyaml) + buffer (.getBytes yaml-string StandardCharsets/UTF_8) + masks (int-array 5) + needed (.parseYsToEvt parser buffer masks) + buffer (.getBytes yaml-string StandardCharsets/UTF_8) + masks (int-array needed) + _ (.parseYsToEvt parser buffer masks) + get-str (fn [i] + (let [off (aget masks (inc i)) + len (aget masks (+ i 2))] + (reduce + (fn [slice i] (str slice (char (aget buffer i)))) + "" (range off (+ off len)))))] + + (loop [i 0, tag nil, anchor nil, events []] + (if (< i needed) + (let [mask (aget masks i) + type (event-type mask) + ; _ (WWW (Integer/toString mask 2) type) + sval (when (flag? HAS_STR mask) (get-str i)) + tag (if (flag? TAG_ mask) sval tag) + anchor (if (flag? ANCH mask) sval anchor) + event (when type + (let [event {:+ type} + event (if (flag? FLOW mask) + (assoc event :flow true) event) + event (if anchor (assoc event :& anchor) event) + event (if tag + (let [tag (str/replace tag + #"^!!" + "tag:yaml.org,2002:")] + (assoc event :! tag)) event) + event (if sval (assoc event + (get-skey mask) sval) event) + event (if (= type "=ALI") + {:+ "=ALI" :* sval} + event)] + event)) + events (if event (conj events event) events) + i (+ i (if sval 3 1))] + (if event + (recur i nil nil events) + (recur i tag anchor events))) + events))))) + +(def parse-fn (if-let [parser-name (System/getenv "YS_PARSER")] + (condp = parser-name + "" parse-snakeyaml + "snake" parse-snakeyaml + "rapid" parse-rapidyaml + "ryml" parse-rapidyaml + ; TODO: + ;"rapid-buf" parse-rapidyaml-buf + ;"ryml-buf" parse-rapidyaml-buf + (die "Unknown YS_PARSER value: " parser-name)) + parse-snakeyaml)) (comment ) diff --git a/core/test/yamlscript/parser_test.clj b/core/test/yamlscript/parser_test.clj index 29420b7d8..ef04eec4b 100644 --- a/core/test/yamlscript/parser_test.clj +++ b/core/test/yamlscript/parser_test.clj @@ -8,6 +8,11 @@ [yamlscript.parser :as parser] [yamltest.core :as test])) +(defn parse-test-case [yaml-string] + (->> yaml-string + parser/parse + (remove (fn [ev] (= "DOC" (subs (:+ ev) 1)))))) + (test/load-yaml-test-files ["test/compiler-stack.yaml" "test/resolver.yaml" @@ -16,7 +21,7 @@ :test (fn [test] (->> test :yamlscript - parser/parse-test-case + parse-test-case (map pr-str) (map #(subs %1 4 (dec (count %1)))))) :want (fn [test] diff --git a/libyamlscript/Makefile b/libyamlscript/Makefile index da62f4181..2e5a22988 100644 --- a/libyamlscript/Makefile +++ b/libyamlscript/Makefile @@ -28,7 +28,7 @@ endif #------------------------------------------------------------------------------ build:: $(BUILD_TARGETS) -jar: $(LIBYAMLSCRIPT_JAR_PATH) +jar: $(LIBYAMLSCRIPT_JAR_PATH) $(RAPIDYAML_INSTALLED) install:: $(BUILD_TARGETS) mkdir -p $(PREFIX)/include/ @@ -48,7 +48,7 @@ test:: $(LIBYAMLSCRIPT_SO_FQNP) repl-deps:: $(LIBYAMLSCRIPT_JAR_PATH) -$(LIBYAMLSCRIPT_SO_FQNP): $(LIBYAMLSCRIPT_JAR_PATH) +$(LIBYAMLSCRIPT_SO_FQNP): $(LIBYAMLSCRIPT_JAR_PATH) $(RAPIDYAML_INSTALLED) $(JAVA_INSTALLED) ifneq (true,$(LIBZ)) $(error *** The 'libz.$(SO)' library is required by native-image but not installed) endif @@ -77,6 +77,9 @@ $(LIBYAMLSCRIPT_JAR_PATH): $(LEIN) $(JAVA_INSTALLED) $(YAMLSCRIPT_CORE_INSTALLED $< uberjar endif +$(RAPIDYAML_INSTALLED): + $(MAKE) -C $(RAPIDYAML) $@ + Dockerfile:: $(COMMON) Makefile cat \ $ yamlscript" + HOMEPAGE_URL "https://github.com/biojppm/rapidyaml -> https://github.com/yaml/yamlscript" + LANGUAGES CXX) + +find_package(JNI REQUIRED) + +option(YSPARSE_TIMED "add timings to sections" OFF) +option(YSPARSE_DBG "enable debug logs" OFF) + +if(UNIX) + set(CMAKE_SHARED_LIBRARY_SUFFIX .so) +endif() + +set(libname rapidyaml) # TODO rename to ysparse + +add_library(${libname} + # + # JNI bridge + org_rapidyaml_Rapidyaml.h + org_rapidyaml_Rapidyaml.cpp + # + # ysparse files + ysparse_common.hpp + ysparse_evt_handler.hpp + ysparse_evt_handler.cpp + ysparse_evt.hpp + ysparse_evt.cpp + # + # files required from rapidyaml + rapidyaml/src/c4/yml/common.cpp + rapidyaml/src/c4/yml/node_type.cpp + rapidyaml/src/c4/yml/parse.cpp + rapidyaml/src/c4/yml/tree.cpp + rapidyaml/src/c4/yml/tag.cpp + rapidyaml/src/c4/yml/reference_resolver.cpp + # + # files required from rapidyaml/ext/c4core + rapidyaml/ext/c4core/src/c4/base64.cpp + rapidyaml/ext/c4core/src/c4/error.cpp + rapidyaml/ext/c4core/src/c4/language.cpp + rapidyaml/ext/c4core/src/c4/utf.cpp +) +target_include_directories(${libname} PUBLIC + ${CMAKE_CURRENT_LIST_DIR} + ${CMAKE_CURRENT_LIST_DIR}/rapidyaml/src + ${CMAKE_CURRENT_LIST_DIR}/rapidyaml/ext/c4core/src +) +target_compile_definitions(${libname} PUBLIC + RYML_WITH_TAB_TOKENS + RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS + $<$:YSPARSE_TIMED> + $<$:RYML_DBG> +) +set_target_properties(${libname} PROPERTIES CXX_STANDARD 17) + +target_include_directories(${libname} PUBLIC ${JNI_INCLUDE_DIRS}) + +add_executable(${libname}-test ysparse_test.cpp) +target_link_libraries(${libname}-test ${libname}) +add_custom_target(${libname}-test-run + DEPENDS ${libname}-test + COMMAND $ + COMMENT "running C++ tests" +) +add_custom_target(${libname}-test-run-timing + DEPENDS ${libname}-test + COMMAND $ --timing + COMMENT "running C++ tests, with timing" +) diff --git a/rapidyaml/native/Makefile b/rapidyaml/native/Makefile new file mode 100644 index 000000000..97316fa99 --- /dev/null +++ b/rapidyaml/native/Makefile @@ -0,0 +1,113 @@ +include ../../common/base.mk +include $(COMMON)/clojure.mk +include $(COMMON)/java.mk +include $(COMMON)/python.mk + +# TODO change to static library! +# https://www.graalvm.org/latest/reference-manual/native-image/guides/build-static-executables/ +# https://www.blog.akhil.cc/static-jni +# https://stackoverflow.com/questions/24493337/linking-static-library-with-jni + +THIS_DIR := $(shell pwd) +BUILD_ROOT := $(THIS_DIR)/_build +MUSL_DIR := $(BUILD_ROOT)/x86_64-linux-musl-cross +BDIR := $(BUILD_ROOT)/$(RAPIDYAML_BUILD_TYPE)-musl$(RAPIDYAML_MUSL)-timed$(RAPIDYAML_TIMED)-dbg$(RAPIDYAML_DBG) + +RAPIDYAML_DEPS := \ + Makefile \ + CMakeLists.txt \ + $(JAVA_HOME) \ + $(RAPIDYAML_JNI_H) \ + $(wildcard ./*pp) \ + +CMK_ENV ?= +CMK_ENV_STATIC ?= +CMK_ENV_SHARED ?= +CMK_FLAGS := \ + -D CMAKE_BUILD_TYPE=$(RAPIDYAML_BUILD_TYPE) \ + -D YSPARSE_TIMED=$(RAPIDYAML_TIMED) \ + -D YSPARSE_DBG=$(RAPIDYAML_DBG) \ + -D CMAKE_EXPORT_COMPILE_COMMANDS=ON +CMK_FLAGS_STATIC ?= +CMK_FLAGS_SHARED ?= +CMK_FLAGS_EXTRA ?= +ifneq ($(RAPIDYAML_MUSL),0) + CMK_ENV += MUSL_DIR=$(MUSL_DIR) + CMK_FLAGS_STATIC += \ + -D CMAKE_TOOLCHAIN_FILE=musl.x86_64.cmake \ + -D CMAKE_C_FLAGS='-static' \ + -D CMAKE_CXX_FLAGS='-static' +endif + + +#------------------------------------------------------------------------------ +default:: + +cfg:: cfg-static cfg-shared +build:: build-static build-shared +test: test-static test-shared + +rapidyaml: + mkdir -p $@ + $(GIT) -C $@ init -q . + $(GIT) -C $@ remote add origin $(RAPIDYAML_REPO) + $(GIT) -C $@ fetch origin $(RAPIDYAML_TAG) + $(GIT) -C $@ reset --hard FETCH_HEAD + $(GIT) -C $@ submodule update --init --recursive + +clean:: + $(RM) librapidyaml.* + $(RM) -r _build + $(RM) -r rapidyaml-install + +realclean:: clean + $(RM) -r rapidyaml + +jni: $(RAPIDYAML_JNI_H) + +jnicheck: jni + $(GIT) diff --exit-code $(RAPIDYAML_JNI_H) + + +#------------------------------------------------------------------------------ + +build-static: $(RAPIDYAML_LIB) +cfg-static: rapidyaml + $(CMK_ENV) $(CMK_ENV_STATIC) $(CMAKE) -S . -B $(BDIR)-static $(CMK_FLAGS) $(CMK_FLAGS_STATIC) $(CMK_FLAGS_EXTRA) -D BUILD_SHARED_LIBS=OFF +test-static: build-static + $(CMK_ENV) $(CMK_ENV_STATIC) $(CMAKE) --build $(BDIR)-static --verbose --target rapidyaml-test-run +test-static-timing: build-static + $(CMK_ENV) $(CMK_ENV_STATIC) $(CMAKE) --build $(BDIR)-static --verbose --target rapidyaml-test-run-timing +$(RAPIDYAML_LIB): cfg-static $(RAPIDYAML_DEPS) + $(CMK_ENV) $(CMK_ENV_STATIC) $(CMAKE) --build $(BDIR)-static --verbose --parallel --target rapidyaml + cp -fv $(BDIR)-static/*.a $@ + +build-shared: $(RAPIDYAML_SO) +cfg-shared: rapidyaml + $(CMK_ENV) $(CMK_ENV_SHARED) $(CMAKE) -S . -B $(BDIR)-shared $(CMK_FLAGS) $(CMK_FLAGS_SHARED) $(CMK_FLAGS_EXTRA) -D BUILD_SHARED_LIBS=ON +test-shared: build-shared + $(CMK_ENV) $(CMK_ENV_SHARED) $(CMAKE) --build $(BDIR)-shared --verbose --target rapidyaml-test-run +test-shared-timing: build-shared + $(CMK_ENV) $(CMK_ENV_SHARED) $(CMAKE) --build $(BDIR)-shared --verbose --target rapidyaml-test-run-timing +$(RAPIDYAML_SO): cfg-shared $(RAPIDYAML_DEPS) + $(CMK_ENV) $(CMK_ENV_SHARED) $(CMAKE) --build $(BDIR)-shared --verbose --parallel --target rapidyaml + cp -fv $(BDIR)-shared/*.so $@ + ln -fs $@ librapidyaml.so + +musl: +ifneq ($(RAPIDYAML_MUSL),0) +musl: $(MUSL_DIR) +cfg-static: $(MUSL_DIR) +cfg-shared: $(MUSL_DIR) +$(MUSL_DIR): + mkdir -p `dirname $(MUSL_DIR)` + cd `dirname $(MUSL_DIR)` && \ + wget https://musl.cc/x86_64-linux-musl-cross.tgz && \ + tar xfz x86_64-linux-musl-cross.tgz + ln -fs /usr/lib/ld-musl-x86_64.so.1 $(MUSL_DIR)/bin/ldd + @# musls dynamic linker is an ldd stand-in + @# see https://wiki.musl-libc.org/faq +endif + +$(RAPIDYAML_JNI_H): $(JAVAC) $(RAPIDYAML_JAVA) + $(JAVAC) -h . $(RAPIDYAML_JAVA) # $^ doesn't work diff --git a/rapidyaml/native/musl.x86_64.cmake b/rapidyaml/native/musl.x86_64.cmake new file mode 100644 index 000000000..f20fb53b0 --- /dev/null +++ b/rapidyaml/native/musl.x86_64.cmake @@ -0,0 +1,34 @@ +set(MUSL_DIR $ENV{MUSL_DIR}) +set(MUSL_TGT x86_64-linux-musl) + +set(MUSL_BIN ${MUSL_DIR}/bin/${MUSL_TGT}-) +set(MUSL_PFX ${MUSL_DIR}/${MUSL_TGT}) +set(MUSL_LIB ${MUSL_DIR}/${MUSL_TGT}/lib) +set(MUSL_INC ${MUSL_DIR}/${MUSL_TGT}/include) + +set(MUSL TRUE) + +set(CMAKE_C_COMPILER ${MUSL_BIN}gcc) +set(CMAKE_CXX_COMPILER ${MUSL_BIN}g++) +set(CMAKE_AR ${MUSL_BIN}ar) +set(CMAKE_C_COMPILER_AR ${MUSL_BIN}ar) +set(CMAKE_CXX_COMPILER_AR ${MUSL_BIN}ar) +set(CMAKE_RANLIB ${MUSL_BIN}ranlib) +set(CMAKE_C_COMPILER_RANLIB ${MUSL_BIN}ranlib) +set(CMAKE_CXX_COMPILER_RANLIB ${MUSL_BIN}ranlib) +set(CMAKE_ADDR2LINE ${MUSL_BIN}addr2line) +set(CMAKE_LINKER ${MUSL_BIN}ld) +set(CMAKE_NM ${MUSL_BIN}nm) +set(CMAKE_OBJCOPY ${MUSL_BIN}objcopy) +set(CMAKE_OBJDUMP ${MUSL_BIN}objdump) +set(CMAKE_READELF ${MUSL_BIN}readelf) +set(CMAKE_STRIP ${MUSL_BIN}strip) + +# set searching rules for cross-compiler +set(CMAKE_SYSTEM_PREFIX_PATH ${MUSL_PFX}) +set(CMAKE_SYSTEM_LIBRARY_PATH ${MUSL_LIB}) +set(CMAKE_SYSTEM_INCLUDE_PATH ${MUSL_INC}) +set(CMAKE_FIND_ROOT_PATH ${MUSL_PFX}) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) # search also in the host +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) # search also in the host +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) # search also in the host diff --git a/rapidyaml/native/org_rapidyaml_Rapidyaml.cpp b/rapidyaml/native/org_rapidyaml_Rapidyaml.cpp new file mode 100644 index 000000000..050c75aad --- /dev/null +++ b/rapidyaml/native/org_rapidyaml_Rapidyaml.cpp @@ -0,0 +1,189 @@ +#include +#include "ysparse_evt.hpp" + +#ifdef __cplusplus +extern "C" { +#endif + + +static C4_NO_INLINE void throw_runtime_exception(JNIEnv * env, const char* msg); +static C4_NO_INLINE void throw_parse_error(JNIEnv *env, size_t offset, size_t line, size_t column, const char *msg); + + +JNIEXPORT void JNICALL +Java_org_rapidyaml_Rapidyaml_ysparse_1timing_1set(JNIEnv *, jobject, jboolean yes) +{ + ysparse_timing_set(yes); +} + +JNIEXPORT jlong JNICALL +Java_org_rapidyaml_Rapidyaml_ysparse_1init(JNIEnv *env, jobject) +{ + ysparse *obj = ysparse_init(); + return (jlong)obj; +} + + +JNIEXPORT void JNICALL +Java_org_rapidyaml_Rapidyaml_ysparse_1destroy(JNIEnv *, jobject, jlong obj) +{ + ysparse_destroy((ysparse*)obj); +} + + +JNIEXPORT jint JNICALL +Java_org_rapidyaml_Rapidyaml_ysparse_1parse(JNIEnv *env, jobject, + jlong obj, jstring jfilename, + jbyteArray src, jint src_len, + jintArray dst, jint dst_len) +{ + TIMED_SECTION("jni:ysparse", (size_type)src_len); + jbyte* src_ = nullptr; + int* dst_ = nullptr; + const char *filename = nullptr; + jboolean dst_is_copy = false; + jboolean src_is_copy = false; + { + TIMED_SECTION("jni:ysparse/get_jni", (size_type)src_len); + // this is __S__L__O__W__ + // https://stackoverflow.com/questions/43763129/jni-is-getintarrayelements-always-linear-in-time + // https://stackoverflow.com/questions/7395695/how-to-convert-from-bytebuffer-to-integer-and-string + { + TIMED_SECTION("jni:ysparse/GetByteArray(src)"); + src_ = env->GetByteArrayElements(src, &src_is_copy); + } + { + TIMED_SECTION("jni:ysparse/GetIntArray(dst)"); + dst_ = env->GetIntArrayElements(dst, &dst_is_copy); + } + { + TIMED_SECTION("jni:ysparse/GetStringUTFChars()"); + filename = env->GetStringUTFChars(jfilename, 0); + } + } + int rc = 0; + { + TIMED_SECTION("jni:ysparse/parse", (size_type)src_len); + try + { + rc = ysparse_parse((ysparse*)obj, filename, + (char*)src_, src_len, + dst_, dst_len); + } + catch (YsParseError const& exc) + { + throw_parse_error(env, exc.location.offset, exc.location.line, exc.location.col, exc.msg.c_str()); + } + catch (std::exception const& exc) + { + throw_runtime_exception(env, exc.what()); + } + } + { + TIMED_SECTION("jni:ysparse/release"); + // __S__L__O__W__ + { + TIMED_SECTION("jni:ysparse/ReleaseByteArray(src)"); + env->ReleaseByteArrayElements(src, src_, 0); + } + { + TIMED_SECTION("jni:ysparse/ReleaseIntArray(dst)"); + env->ReleaseIntArrayElements(dst, dst_, 0); + } + { + TIMED_SECTION("jni:ysparse/ReleaseStringUTFChars()"); + env->ReleaseStringUTFChars(jfilename, filename); + } + } + return rc; +} + + +JNIEXPORT jint JNICALL +Java_org_rapidyaml_Rapidyaml_ysparse_1parse_1buf(JNIEnv *env, jobject, + jlong obj, jstring jfilename, + jobject src, jint src_len, + jobject dst, jint dst_len) +{ + TIMED_SECTION("jni:ysparse_buf", (size_type)src_len); + char* src_ = nullptr; + int* dst_ = nullptr; + const char *filename = nullptr; + { + TIMED_SECTION("jni:ysparse_buf/get_jni", (size_type)src_len); + src_ = (char*)env->GetDirectBufferAddress(src); + dst_ = (int*)env->GetDirectBufferAddress(dst); + filename = env->GetStringUTFChars(jfilename, 0); + if(!src_) + throw_runtime_exception(env, "null pointer: src"); + if(!dst_) + throw_runtime_exception(env, "null pointer: dst"); + } + { + TIMED_SECTION("jni:ysparse_buf/parse", (size_type)src_len); + try + { + return ysparse_parse((ysparse*)obj, filename, src_, src_len, dst_, dst_len); + } + catch (YsParseError const& exc) + { + throw_parse_error(env, exc.location.offset, exc.location.line, exc.location.col, exc.msg.c_str()); + } + catch (std::exception const& exc) + { + throw_runtime_exception(env, exc.what()); + } + } + return 0; // this is executed even if there is an exception +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +static bool s_timing_enabled = false; +RYML_EXPORT bool ysparse_timing_get() +{ + return s_timing_enabled; +} +RYML_EXPORT void ysparse_timing_set(bool yes) +{ + s_timing_enabled = yes; +} + +static C4_NO_INLINE void throw_java_exception(JNIEnv * env, const char* type, const char* msg) +{ + jclass clazz = env->FindClass(type); + if (clazz != NULL) // if it is null, a NoClassDefFoundError was already thrown + env->ThrowNew(clazz, msg); +} + +static C4_NO_INLINE void throw_runtime_exception(JNIEnv *env, const char* msg) +{ + throw_java_exception(env, "java/lang/RuntimeException", msg); +} + +static C4_NO_INLINE void throw_parse_error(JNIEnv *env, size_t offset, size_t line, size_t column, const char *msg) +{ + // see https://stackoverflow.com/questions/55013243/jni-custom-exceptions-with-more-than-one-parameter + jclass clazz = env->FindClass("org/rapidyaml/YamlParseErrorException"); + if (clazz != NULL) // if it is null, a NoClassDefFoundError was already thrown + { + jstring jmsg = env->NewStringUTF(msg); + jint joffset = (jint)offset; + jint jline = (jint)line; + jint jcol = (jint)column; + // see https://www.rgagnon.com/javadetails/java-0286.html + // about the proper signature. + // we want (int, int, int, String): + const char * const signature = "(IIILjava/lang/String;)V"; + jmethodID ctor = env->GetMethodID(clazz, "", signature); + jobject jexc = env->NewObject(clazz, ctor, joffset, jline, jcol, jmsg); + env->Throw((jthrowable)jexc); // https://stackoverflow.com/questions/2455668/jni-cast-between-jobect-and-jthrowable + } +} + +#ifdef __cplusplus +} +#endif diff --git a/rapidyaml/native/org_rapidyaml_Rapidyaml.h b/rapidyaml/native/org_rapidyaml_Rapidyaml.h new file mode 100644 index 000000000..021ccff69 --- /dev/null +++ b/rapidyaml/native/org_rapidyaml_Rapidyaml.h @@ -0,0 +1,53 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_rapidyaml_Rapidyaml */ + +#ifndef _Included_org_rapidyaml_Rapidyaml +#define _Included_org_rapidyaml_Rapidyaml +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_rapidyaml_Rapidyaml + * Method: ysparse_timing_set + * Signature: (Z)V + */ +JNIEXPORT void JNICALL Java_org_rapidyaml_Rapidyaml_ysparse_1timing_1set + (JNIEnv *, jobject, jboolean); + +/* + * Class: org_rapidyaml_Rapidyaml + * Method: ysparse_init + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_rapidyaml_Rapidyaml_ysparse_1init + (JNIEnv *, jobject); + +/* + * Class: org_rapidyaml_Rapidyaml + * Method: ysparse_destroy + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_rapidyaml_Rapidyaml_ysparse_1destroy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_rapidyaml_Rapidyaml + * Method: ysparse_parse + * Signature: (JLjava/lang/String;[BI[II)I + */ +JNIEXPORT jint JNICALL Java_org_rapidyaml_Rapidyaml_ysparse_1parse + (JNIEnv *, jobject, jlong, jstring, jbyteArray, jint, jintArray, jint); + +/* + * Class: org_rapidyaml_Rapidyaml + * Method: ysparse_parse_buf + * Signature: (JLjava/lang/String;Ljava/nio/ByteBuffer;ILjava/nio/IntBuffer;I)I + */ +JNIEXPORT jint JNICALL Java_org_rapidyaml_Rapidyaml_ysparse_1parse_1buf + (JNIEnv *, jobject, jlong, jstring, jobject, jint, jobject, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/rapidyaml/native/ysparse_common.hpp b/rapidyaml/native/ysparse_common.hpp new file mode 100644 index 000000000..48e4dc090 --- /dev/null +++ b/rapidyaml/native/ysparse_common.hpp @@ -0,0 +1,72 @@ +#pragma once +#ifndef YSPARSE_COMMON_HPP_ +#define YSPARSE_COMMON_HPP_ + +#include +#include + +namespace ryml { +using namespace c4; +using namespace c4::yml; +} // namespace ryml + +using size_type = int; + +struct YsParseError : public std::exception +{ + ryml::Location location; + std::string msg; + const char* what() const noexcept override { return msg.c_str(); } +}; + + +//----------------------------------------------------------------------------- +// timing + +#ifdef __cplusplus +extern "C" { +#endif +RYML_EXPORT bool ysparse_timing_get(); +RYML_EXPORT void ysparse_timing_set(bool yes); +#ifdef __cplusplus +} +#endif + +#ifndef YSPARSE_TIMED +#define TIMED_SECTION(...) +#error +#else +#include +#include +#define TIMED_SECTION(...) timed_section C4_XCAT(ts, __LINE__)(__VA_ARGS__) +struct timed_section +{ + using myclock = std::chrono::steady_clock; + const char* name; + size_type len; + myclock::time_point start; + C4_NO_INLINE timed_section(const char* n, size_type len_=0) + { + if(ysparse_timing_get()) + { + name = n; + len = len_; + start = myclock::now(); + //fprintf(stderr, "%10s : %s...\n", " ", name); + } + } + C4_NO_INLINE ~timed_section() + { + if(ysparse_timing_get()) + { + const std::chrono::duration t = myclock::now() - start; + fprintf(stderr, "%10.6fms: %s", t.count(), name); + if(len) + fprintf(stderr, " %.3fMB/s", (float)len / t.count() * 1.e-3); + fprintf(stderr, "\n"); + } + } +}; +#endif // YSPARSE_TIMED + +#endif // YSPARSE_COMMON_HPP_ diff --git a/rapidyaml/native/ysparse_evt.cpp b/rapidyaml/native/ysparse_evt.cpp new file mode 100644 index 000000000..0c3497666 --- /dev/null +++ b/rapidyaml/native/ysparse_evt.cpp @@ -0,0 +1,63 @@ +#include "ysparse_evt.hpp" + +using namespace ryml; + +#if defined(__cplusplus) +extern "C" { +#endif +// see +// https://stackoverflow.com/questions/230689/best-way-to-throw-exceptions-in-jni-code +// https://stackoverflow.com/questions/4138168/what-happens-when-i-throw-a-c-exception-from-a-native-java-method + +namespace { +C4_NORETURN void ysparse_error(const char* msg, size_t msg_len, Location location, void *user_data) +{ + YsParseError exc; + exc.location = location; + exc.msg.assign(msg, msg_len); + throw exc; +} +} // anon namespace + +RYML_EXPORT ysparse *ysparse_init() +{ + TIMED_SECTION("cpp:ysparse_init"); + Callbacks cb = {}; + cb.m_error = &ysparse_error; + set_callbacks(cb); + ysparse *ryml2evt = _RYML_CB_ALLOC(get_callbacks(), ysparse, 1); + _RYML_CB_CHECK(get_callbacks(), ryml2evt != nullptr); + new ((void*)ryml2evt) ysparse(); + return ryml2evt; +} + +RYML_EXPORT void ysparse_destroy(ysparse *obj) +{ + TIMED_SECTION("cpp:ysparse_destroy"); + obj->~ysparse(); + _RYML_CB_FREE(get_callbacks(), obj, ysparse, 1); +} + +RYML_EXPORT size_type ysparse_parse(ysparse *obj, + const char *filename, + char *ys, size_type ys_size, + evt::DataType *events, size_type evt_size) +{ + TIMED_SECTION("cpp:ysparse", ys_size); + csubstr filename_ = filename ? to_csubstr(filename) : csubstr{}; + substr ys_(ys, (size_t)ys_size); + { + TIMED_SECTION("cpp:ysparse/reset"); + obj->reset(ys_, events, evt_size); + obj->m_handler.reserve(256u); + } + { + TIMED_SECTION("cpp:ysparse/parse", ys_size); + obj->m_parser.parse_in_place_ev(filename_, ys_); + } + return (size_type)obj->m_handler.m_evt_curr; +} + +#if defined(__cplusplus) +} +#endif diff --git a/rapidyaml/native/ysparse_evt.hpp b/rapidyaml/native/ysparse_evt.hpp new file mode 100644 index 000000000..9b0c9dbb8 --- /dev/null +++ b/rapidyaml/native/ysparse_evt.hpp @@ -0,0 +1,94 @@ +#pragma once +#ifndef YSPARSE_EVT_HPP_ +#define YSPARSE_EVT_HPP_ + +#include "ysparse_evt_handler.hpp" +#include "ysparse_common.hpp" + +#if defined(__cplusplus) +extern "C" { +#endif + +struct RYML_EXPORT ysparse +{ + ys::EventHandlerEvt m_handler; + c4::yml::ParseEngine m_parser; + ysparse() + : m_handler() + , m_parser(&m_handler) + { + RYML_CHECK(m_parser.options().scalar_filtering()); + } + void reset(c4::csubstr src, evt::DataType *evt, int32_t evt_size) + { + m_handler.reset(src, evt, evt_size); + } +}; + + +//----------------------------------------------------------------------------- + +/** Initialize the resources */ +RYML_EXPORT ysparse *ysparse_init(); + +/** Destroy the resources */ +RYML_EXPORT void ysparse_destroy(ysparse *ryml2evt); + +/** Parse YAML in the string `ys` of size `ys_size`, and write the + * result into the array of (integer) events `evt` of size + * `evt_size`. Each event is encoded as a mask of evt::EventFlags + * (note that it uses the integer evt::DataType as the underlying + * type), and when an event has an associated string, it is followed + * in the array by two extra values, which encode the offset and the + * length of the string in the `ys` string. The `ys` string is mutated + * during parsing. + * + * @return the size needed for `evt`. The caller must check if the + * returned size is larger than `evt_size`. If so, this means that + * `evt` could not accomodate all events produced from `ys`, and is + * incomplete. The caller must then (1) resize `evt` to at least the + * return value, (2) re-copy the original YS into `ys` and (3) call + * again this function, passing in the resized `evt` and the fresh + * copy in `ys`. + * + * @note nothing is written beyond `evt_size`. This means that when + * `evt_size` is 0, then `evt` can be null. This function can be + * safely called for any valid pair of `evt` and `evt_size`, and will + * always return the same required size. + * + * For example, the YAML `say: 2 + 2` produces the following sequence of + * 12 integers: + * + * ```c++ + * BSTR, + * BDOC, + * VAL|BMAP|BLCK, + * KEY|SCLR|PLAI, 0, 3, // "say" + * VAL|SCLR|PLAI, 5, 5, // "2 + 2" + * EMAP, + * EDOC, + * ESTR, + * ``` + * + * Note that the scalar events, ie "say" and "2 + 2", are followed + * each by two extra integers encoding the offset and length of the + * scalar's string. These two extra integers are present whenever the + * event has any of the bits `SCLR`, `ALIA`, `ANCH` or `TAG`. For ease + * of use, there is a bitmask `HAS_STR`, which enables quick testing + * by a simple `flags & HAS_STR`. Refer to evt::EventFlags for the + * full list of flags and their meaning. + * + * Also, where a string requires filtering, the parser filters it + * in-place in the input string, and the extra integers will pertain + * to the resulting filtered string. + */ +RYML_EXPORT size_type ysparse_parse(ysparse *ryml2evt, + const char *filename, + char *ys, size_type ys_size, + evt::DataType *evt, size_type evt_size); + +#if defined(__cplusplus) +} +#endif + +#endif /* YSPARSE_EVT_HPP_ */ diff --git a/rapidyaml/native/ysparse_evt_handler.cpp b/rapidyaml/native/ysparse_evt_handler.cpp new file mode 100644 index 000000000..24e5fd662 --- /dev/null +++ b/rapidyaml/native/ysparse_evt_handler.cpp @@ -0,0 +1,12 @@ +#include "ysparse_evt_handler.hpp" +#include +#include + +namespace c4 { +namespace yml { + +// instantiate the template +template class ParseEngine; + +} // namespace yml +} // namespace c4 diff --git a/rapidyaml/native/ysparse_evt_handler.hpp b/rapidyaml/native/ysparse_evt_handler.hpp new file mode 100644 index 000000000..a8f098b99 --- /dev/null +++ b/rapidyaml/native/ysparse_evt_handler.hpp @@ -0,0 +1,725 @@ +#ifndef _YSPARSE_EVT_HANDLER_HPP_ +#define _YSPARSE_EVT_HANDLER_HPP_ + +#include +#include +#include +#include +#include +#include + +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") + +namespace evt { +using DataType = int32_t; +typedef enum : DataType { + // Event types + BSTR = 1 << 0, // +STR + ESTR = 1 << 1, // -STR + BDOC = 1 << 2, // +DOC + EDOC = 1 << 3, // -DOC + BMAP = 1 << 4, // +MAP + EMAP = 1 << 5, // -MAP + BSEQ = 1 << 6, // +SEQ + ESEQ = 1 << 7, // -SEQ + SCLR = 1 << 8, // =VAL + ALIA = 1 << 9, // =ALI + + // Style flags + PLAI = 1 << 16, // : (plain scalar) + SQUO = 1 << 17, // ' (single-quoted scalar) + DQUO = 1 << 18, // " (double-quoted scalar) + LITL = 1 << 19, // | (block literal scalar) + FOLD = 1 << 20, // > (block folded scalar) + FLOW = 1 << 21, // flow container: + // [] for seqs or {} for maps + BLCK = 1 << 22, // block container + + // Modifiers + ANCH = 1 << 24, // anchor + TAG_ = 1 << 25, // tag + + // Structure flags + KEY_ = 1 << 26, // as key + VAL_ = 1 << 27, // as value + EXPL = 1 << 28, // --- (with BDOC) or + // ... (with EDOC) + // (may be fused with FLOW + // if needed) + + // Utility flags + LAST = EXPL, + MASK = (LAST << 1) - 1, + // the event requires a string. the next two integers will provide + // respectively the string's offset and length + HAS_STR = SCLR|ALIA|ANCH|TAG_ +} EventFlags; +} // namespace evt + + +namespace ys { + +using c4::csubstr; +using c4::substr; +using c4::to_substr; +using c4::to_csubstr; +#ifdef RYML_DBG +using c4::_dbg_printf; +#endif + +struct EventHandlerEvtState : public c4::yml::ParserState +{ + c4::yml::type_bits evt_type; + int32_t evt_id; +}; + + +struct EventHandlerEvt : public c4::yml::EventHandlerStack +{ + + /** @name types + * @{ */ + + // our internal state must inherit from parser state + using state = EventHandlerEvtState; + + /** @} */ + +public: + + /** @cond dev */ + csubstr m_str; + evt::DataType * m_evt; + int32_t m_evt_curr; + int32_t m_evt_prev; + int32_t m_evt_size; + char m_key_tag_buf[256]; + char m_val_tag_buf[256]; + std::string m_arena; + + // undefined at the end + #define _enable_(bits) _enable__() + #define _disable_(bits) _disable__() + #define _has_any_(bits) _has_any__() + /** @endcond */ + +public: + + /** @name construction and resetting + * @{ */ + + EventHandlerEvt(c4::yml::Callbacks const& cb) + : EventHandlerStack(cb) + { + reset({}, nullptr, 0); + } + EventHandlerEvt() + : EventHandlerEvt(c4::yml::get_callbacks()) + { + } + + void reset(csubstr str, evt::DataType *dst, int32_t dst_size) + { + _stack_reset_root(); + m_curr->flags |= c4::yml::RUNK|c4::yml::RTOP; + m_curr->evt_type = {}; + m_curr->evt_id = 0; + m_arena.clear(); + m_str = str; + m_evt = dst; + m_evt_size = dst_size; + m_evt_curr = 0; + m_evt_prev = 0; + } + + void reserve(int arena_size) + { + m_arena.reserve(arena_size); + } + + /** @} */ + +public: + + /** @name parse events + * @{ */ + + void start_parse(const char* filename, c4::yml::detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data) + { + this->_stack_start_parse(filename, relocate_arena, relocate_arena_data); + } + + void finish_parse() + { + this->_stack_finish_parse(); + } + + void cancel_parse() + { + while(m_stack.size() > 1) + _pop(); + } + + /** @} */ + +public: + + /** @name YAML stream events */ + /** @{ */ + + void begin_stream() + { + _send_flag_only_(evt::BSTR); + } + + void end_stream() + { + _send_flag_only_(evt::ESTR); + } + + /** @} */ + +public: + + /** @name YAML document events */ + /** @{ */ + + /** implicit doc start (without ---) */ + void begin_doc() + { + _c4dbgpf("{}/{}: begin_doc", m_evt_curr, m_evt_size); + _send_flag_only_(evt::BDOC); + if(_stack_should_push_on_begin_doc()) + { + _c4dbgp("push!"); + _push(); + } + } + /** implicit doc end (without ...) */ + void end_doc() + { + _c4dbgpf("{}/{}: end_doc", m_evt_curr, m_evt_size); + _send_flag_only_(evt::EDOC); + if(_stack_should_pop_on_end_doc()) + { + _c4dbgp("pop!"); + _pop(); + } + } + + /** explicit doc start, with --- */ + void begin_doc_expl() + { + _c4dbgpf("{}/{}: begin_doc_expl", m_evt_curr, m_evt_size); + _send_flag_only_(evt::BDOC|evt::EXPL); + if(_stack_should_push_on_begin_doc()) + { + _c4dbgp("push!"); + _push(); + } + } + /** explicit doc end, with ... */ + void end_doc_expl() + { + _c4dbgpf("{}/{}: end_doc_expl", m_evt_curr, m_evt_size); + _send_flag_only_(evt::EDOC|evt::EXPL); + if(_stack_should_pop_on_end_doc()) + { + _c4dbgp("pop!"); + _pop(); + } + } + + /** @} */ + +public: + + /** @name YAML map functions */ + /** @{ */ + + void begin_map_key_flow() + { + _RYML_CB_ERR(m_stack.m_callbacks, "container keys not supported"); + } + void begin_map_key_block() + { + _RYML_CB_ERR(m_stack.m_callbacks, "container keys not supported"); + } + + void begin_map_val_flow() + { + _c4dbgpf("{}/{}: bmap flow", m_evt_curr, m_evt_size); + _send_flag_only_(evt::VAL_|evt::BMAP|evt::FLOW); + _mark_parent_with_children_(); + _enable_(c4::yml::MAP|c4::yml::FLOW_SL); + _push(); + } + void begin_map_val_block() + { + _c4dbgpf("{}/{}: bmap block", m_evt_curr, m_evt_size); + _send_flag_only_(evt::VAL_|evt::BMAP|evt::BLCK); + _mark_parent_with_children_(); + _enable_(c4::yml::MAP|c4::yml::BLOCK); + _push(); + } + + void end_map() + { + _pop(); + _send_flag_only_(evt::EMAP); + } + + /** @} */ + +public: + + /** @name YAML seq events */ + /** @{ */ + + void begin_seq_key_flow() + { + _RYML_CB_ERR(m_stack.m_callbacks, "container keys not supported"); + } + void begin_seq_key_block() + { + _RYML_CB_ERR(m_stack.m_callbacks, "container keys not supported"); + } + + void begin_seq_val_flow() + { + _c4dbgpf("{}/{}: bseq flow", m_evt_curr, m_evt_size); + _send_flag_only_(evt::VAL_|evt::BSEQ|evt::FLOW); + _mark_parent_with_children_(); + _enable_(c4::yml::SEQ|c4::yml::FLOW_SL); + _push(); + } + void begin_seq_val_block() + { + _c4dbgpf("{}/{}: bseq block", m_evt_curr, m_evt_size); + _send_flag_only_(evt::VAL_|evt::BSEQ|evt::BLCK); + _mark_parent_with_children_(); + _enable_(c4::yml::SEQ|c4::yml::BLOCK); + _push(); + } + + void end_seq() + { + _pop(); + _send_flag_only_(evt::ESEQ); + } + + /** @} */ + +public: + + /** @name YAML structure events */ + /** @{ */ + + void add_sibling() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + m_curr->evt_type = {}; + } + + /** set the previous val as the first key of a new map, with flow style. + * + * See the documentation for @ref doc_event_handlers, which has + * important notes about this event. + */ + void actually_val_is_first_key_of_new_map_flow() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_curr > 2); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_prev > 0); + _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_flow", m_evt_curr, m_evt_size, m_evt_prev); + // BEFORE + // ... flag start len (free) + // | | + // prev curr + // AFTER + // ... flag flag start len (free) + // | | + // prev curr + if(m_evt_prev < m_evt_size) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[m_evt_prev] & evt::HAS_STR) || m_evt_curr >= m_evt_size); + if(m_evt_curr < m_evt_size) + { + // watchout: it must be in this order! + m_evt[m_evt_curr ] = m_evt[m_evt_prev + 2]; + m_evt[m_evt_curr - 1] = m_evt[m_evt_prev + 1]; + m_evt[m_evt_curr - 2] = m_evt[m_evt_prev] | evt::KEY_; + m_evt[m_evt_curr - 2] &= ~evt::VAL_; + } + m_evt[m_evt_prev] = evt::BMAP|evt::FLOW|evt::VAL_; + } + m_curr->evt_id = m_evt_curr - 2; + ++m_evt_prev; + ++m_evt_curr; + _enable_(c4::yml::MAP|c4::yml::FLOW); + _push(); + } + + void actually_val_is_first_key_of_new_map_block() + { + _RYML_CB_ERR(m_stack.m_callbacks, "container keys not supported"); + } + + /** @} */ + +public: + + /** @name YAML scalar events */ + /** @{ */ + + + C4_ALWAYS_INLINE void set_key_scalar_plain_empty() + { + _c4dbgpf("{}/{}: set_key_scalar_plain_empty", m_evt_curr, m_evt_size); + _send_key_scalar_(_get_latest_empty_scalar(), evt::PLAI); + _enable_(c4::yml::KEY|c4::yml::KEY_PLAIN|c4::yml::KEYNIL); + } + C4_ALWAYS_INLINE void set_val_scalar_plain_empty() + { + _c4dbgpf("{}/{}: set_val_scalar_plain_empty", m_evt_curr, m_evt_size); + _send_val_scalar_(_get_latest_empty_scalar(), evt::PLAI); + _enable_(c4::yml::VAL|c4::yml::VAL_PLAIN|c4::yml::VALNIL); + } + C4_ALWAYS_INLINE csubstr _get_latest_empty_scalar() const + { + // ideally we should search back in the latest event that has + // a scalar, than select a zero-length scalar immediately + // after that scalar. But this also works for now: + return m_str.first(0); + } + + + C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar) + { + _c4dbgpf("{}/{}: set_key_scalar_plain: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_key_scalar_(scalar, evt::PLAI); + _enable_(c4::yml::KEY|c4::yml::KEY_PLAIN); + } + C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar) + { + _c4dbgpf("{}/{}: set_val_scalar_plain: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_val_scalar_(scalar, evt::PLAI); + _enable_(c4::yml::VAL|c4::yml::VAL_PLAIN); + } + + + C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar) + { + _c4dbgpf("{}/{}: set_key_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_key_scalar_(scalar, evt::DQUO); + _enable_(c4::yml::KEY|c4::yml::KEY_DQUO); + } + C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar) + { + _c4dbgpf("{}/{}: set_val_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_val_scalar_(scalar, evt::DQUO); + _enable_(c4::yml::VAL|c4::yml::VAL_DQUO); + } + + + C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar) + { + _c4dbgpf("{}/{}: set_key_scalar_squo: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_key_scalar_(scalar, evt::SQUO); + _enable_(c4::yml::KEY|c4::yml::KEY_SQUO); + } + C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar) + { + _c4dbgpf("{}/{}: set_val_scalar_squo: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_val_scalar_(scalar, evt::SQUO); + _enable_(c4::yml::VAL|c4::yml::VAL_SQUO); + } + + + C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar) + { + _c4dbgpf("{}/{}: set_key_scalar_literal: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_key_scalar_(scalar, evt::LITL); + _enable_(c4::yml::KEY|c4::yml::KEY_LITERAL); + } + C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar) + { + _c4dbgpf("{}/{}: set_val_scalar_literal: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_val_scalar_(scalar, evt::LITL); + _enable_(c4::yml::VAL|c4::yml::VAL_LITERAL); + } + + + C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar) + { + _c4dbgpf("{}/{}: set_key_scalar_folded: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_key_scalar_(scalar, evt::FOLD); + _enable_(c4::yml::KEY|c4::yml::KEY_FOLDED); + } + C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar) + { + _c4dbgpf("{}/{}: set_val_scalar_folded: @{} [{}]~~~{}~~~", m_evt_curr, m_evt_size, scalar.str-m_str.str, scalar.len, scalar); + _send_val_scalar_(scalar, evt::FOLD); + _enable_(c4::yml::VAL|c4::yml::VAL_FOLDED); + } + + + C4_ALWAYS_INLINE void mark_key_scalar_unfiltered() + { + _RYML_CB_ERR(m_stack.m_callbacks, "all scalars must be filtered"); + } + C4_ALWAYS_INLINE void mark_val_scalar_unfiltered() + { + _RYML_CB_ERR(m_stack.m_callbacks, "all scalars must be filtered"); + } + + /** @} */ + +public: + +#define _add_scalar_(i, scalar) \ + _c4dbgpf("{}/{}: scalar!", i, m_evt_size); \ + _RYML_CB_ASSERT(m_stack.m_callbacks, scalar.is_sub(m_str)); \ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt[i] & evt::HAS_STR); \ + _RYML_CB_ASSERT(m_stack.m_callbacks, i + 2 < m_evt_size); \ + m_evt[i + 1] = (evt::DataType)(scalar.str - m_str.str); \ + m_evt[i + 2] = (evt::DataType)scalar.len + + /** @name YAML anchor/reference events */ + /** @{ */ + + void set_key_anchor(csubstr anchor) + { + _c4dbgpf("{}/{}: set_key_anchor", m_evt_curr, m_evt_size); + _enable_(c4::yml::KEYANCH); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::KEY_|evt::ANCH; + _add_scalar_(m_evt_curr, anchor); + } + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + void set_val_anchor(csubstr anchor) + { + _c4dbgpf("{}/{}: set_val_anchor", m_evt_curr, m_evt_size); + _enable_(c4::yml::VALANCH); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::VAL_|evt::ANCH; + _add_scalar_(m_evt_curr, anchor); + } + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + + void set_key_ref(csubstr ref) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ref.begins_with('*')); + _enable_(c4::yml::KEY|c4::yml::KEYREF); + _send_str_(ref.sub(1), evt::KEY_|evt::ALIA); // skip the leading * + } + void set_val_ref(csubstr ref) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ref.begins_with('*')); + _enable_(c4::yml::VAL|c4::yml::VALREF); + _send_str_(ref.sub(1), evt::VAL_|evt::ALIA); // skip the leading * + } + + /** @} */ + +public: + + /** @name YAML tag events */ + /** @{ */ + + void set_key_tag(csubstr tag) + { + _enable_(c4::yml::KEYTAG); + csubstr ttag = _transform_directive(tag, m_key_tag_buf); + _RYML_CB_ASSERT(m_stack.m_callbacks, !ttag.empty()); + if(ttag.begins_with('!') && !ttag.begins_with("!!")) + ttag = ttag.sub(1); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::KEY_|evt::TAG_; + _add_scalar_(m_evt_curr, ttag); + } + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + void set_val_tag(csubstr tag) + { + _enable_(c4::yml::VALTAG); + csubstr ttag = _transform_directive(tag, m_val_tag_buf); + _RYML_CB_ASSERT(m_stack.m_callbacks, !ttag.empty()); + if(ttag.begins_with('!') && !ttag.begins_with("!!")) + ttag = ttag.sub(1); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::VAL_|evt::TAG_; + _add_scalar_(m_evt_curr, ttag); + } + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + + /** @} */ + +public: + + /** @name YAML directive events */ + /** @{ */ + + void add_directive(csubstr directive) + { + _RYML_CB_ERR(m_stack.m_callbacks, "tag directives not supported"); + } + + /** @} */ + +public: + + /** @name YAML arena events */ + /** @{ */ + + substr alloc_arena(size_t len) + { + const size_t sz = m_arena.size(); + csubstr prev = to_csubstr(m_arena); + m_arena.resize(sz + len); + substr out = to_substr(m_arena).sub(sz); + substr curr = to_substr(m_arena); + if(curr.str != prev.str) + _stack_relocate_to_new_arena(prev, curr); + return out; + } + + substr alloc_arena(size_t len, substr *relocated) + { + csubstr prev = to_csubstr(m_arena); + if(!prev.is_super(*relocated)) + return alloc_arena(len); + substr out = alloc_arena(len); + substr curr = to_substr(m_arena); + if(curr.str != prev.str) + *relocated = _stack_relocate_to_new_arena(*relocated, prev, curr); + return out; + } + + /** @} */ + +public: + + /** push a new parent, add a child to the new parent, and set the + * child as the current node */ + void _push() + { + _stack_push(); + m_curr->evt_type = {}; + } + + /** end the current scope */ + void _pop() + { + _stack_pop(); + } + + template C4_ALWAYS_INLINE void _enable__() noexcept + { + m_curr->evt_type |= bits; + } + template C4_ALWAYS_INLINE void _disable__() noexcept + { + m_curr->evt_type &= ~bits; + } + template C4_ALWAYS_INLINE bool _has_any__() const noexcept + { + return (m_curr->evt_type & bits) != c4::yml::type_bits(0); + } + + void _mark_parent_with_children_() + { + if(m_parent) + m_parent->has_children = true; + } + + C4_ALWAYS_INLINE void _send_flag_only_(evt::DataType flags) + { + _c4dbgpf("{}/{}: flag only", m_evt_curr, m_evt_size); + if(m_evt_curr < m_evt_size) + m_evt[m_evt_curr] = flags; + m_curr->evt_id = m_evt_curr; + m_evt_prev = m_evt_curr; + ++m_evt_curr; + } + + C4_ALWAYS_INLINE void _send_key_scalar_(csubstr scalar, evt::DataType flags) + { + _c4dbgpf("{}/{}: key scalar", m_evt_curr, m_evt_size); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::SCLR|evt::KEY_|flags; + _add_scalar_(m_evt_curr, scalar); + } + m_curr->evt_id = m_evt_curr; + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + + C4_ALWAYS_INLINE void _send_val_scalar_(csubstr scalar, evt::DataType flags) + { + _c4dbgpf("{}/{}: val scalar", m_evt_curr, m_evt_size); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = evt::SCLR|evt::VAL_|flags; + _add_scalar_(m_evt_curr, scalar); + } + m_curr->evt_id = m_evt_curr; + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + + C4_ALWAYS_INLINE void _send_str_(csubstr scalar, evt::DataType flags) + { + _c4dbgpf("{}/{}: send str", m_evt_curr, m_evt_size); + if(m_evt_curr + 2 < m_evt_size) + { + m_evt[m_evt_curr] = flags; + _add_scalar_(m_evt_curr, scalar); + } + m_curr->evt_id = m_evt_curr; + m_evt_prev = m_evt_curr; + m_evt_curr += 3; + } + + csubstr _transform_directive(csubstr tag, substr output) + { + if(tag.begins_with("!!")) + { + return tag; + } + else if(tag.begins_with('!')) + { + if(c4::yml::is_custom_tag(tag)) + { + _RYML_CB_ERR_(m_stack.m_callbacks, "tag not found", m_curr->pos); + } + } + csubstr result = c4::yml::normalize_tag_long(tag, output); + _RYML_CB_CHECK(m_stack.m_callbacks, result.len > 0); + _RYML_CB_CHECK(m_stack.m_callbacks, result.str); + return result; + } +#undef _enable_ +#undef _disable_ +#undef _has_any_ + +}; + +} // namespace ys + +C4_SUPPRESS_WARNING_GCC_POP + +#endif /* _C4_YML_EVENT_HANDLER_EVT_HPP_ */ diff --git a/rapidyaml/native/ysparse_test.cpp b/rapidyaml/native/ysparse_test.cpp new file mode 100644 index 000000000..8567e3342 --- /dev/null +++ b/rapidyaml/native/ysparse_test.cpp @@ -0,0 +1,601 @@ +#include +#include +#include + +using c4::csubstr; +using c4::substr; + +namespace c4 +{ +template<> +c4::EnumSymbols const esyms() +{ + static constexpr typename c4::EnumSymbols::Sym syms[] = { + {evt::KEY_, "KEY_"}, + {evt::VAL_, "VAL_"}, + {evt::SCLR, "SCLR"}, + {evt::BSEQ, "BSEQ"}, + {evt::ESEQ, "ESEQ"}, + {evt::BMAP, "BMAP"}, + {evt::EMAP, "EMAP"}, + {evt::ALIA, "ALIA"}, + {evt::ANCH, "ANCH"}, + {evt::TAG_, "TAG_"}, + {evt::PLAI, "PLAI"}, + {evt::SQUO, "SQUO"}, + {evt::DQUO, "DQUO"}, + {evt::LITL, "LITL"}, + {evt::FOLD, "FOLD"}, + {evt::FLOW, "FLOW"}, + {evt::BLCK, "BLCK"}, + {evt::BDOC, "BDOC"}, + {evt::EDOC, "EDOC"}, + {evt::BSTR, "BSTR"}, + {evt::ESTR, "ESTR"}, + {evt::EXPL, "EXPL"}, + }; + return c4::EnumSymbols(syms); +} +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +struct Ys2EvtScoped +{ + ysparse *ryml2evt; + Ys2EvtScoped() : ryml2evt(ysparse_init()) {} + ~Ys2EvtScoped() { if(ryml2evt) ysparse_destroy(ryml2evt); } +}; + + +static bool showcmp = false; +struct TestResult +{ + uint32_t num_assertions; + uint32_t num_tests; + uint32_t num_failed_assertions; + uint32_t num_failed_tests; + operator bool() const { return num_failed_tests == 0 && num_failed_assertions == 0; } + void add(TestResult const& that) + { + num_tests += 1 + that.num_tests; + num_assertions += that.num_assertions; + num_failed_tests += (that.num_failed_assertions > 0) + that.num_failed_tests; + num_failed_assertions += that.num_failed_assertions; + } +}; + +// provide a structured input for the events, grouping the relevant +// data in a single structure +struct EvtWithScalar +{ + evt::DataType flags, str_start, str_len; + csubstr scalar; + bool needs_filter; + EvtWithScalar(evt::DataType t, evt::DataType start=0, evt::DataType len=0, csubstr sclr={}, bool needs_filter_=false) + { + flags = t; + str_start = start; + str_len = len; + scalar = sclr; + needs_filter = needs_filter_; + } + size_t required_size() const { return (flags & evt::HAS_STR) ? 3u : 1u; } +}; + +size_t expected_size(std::vector const& evt) +{ + size_t exp = 0; + for(EvtWithScalar const& e : evt) + exp += e.required_size(); + return exp; +} + +struct TestCase +{ + csubstr ys; + std::vector evt; + +public: + + #define _runtest(name, ...) \ + do { \ + printf("[ RUN ] %s ... \n", #name); \ + TestResult tr_ = name(__VA_ARGS__); \ + tr.add(tr_); \ + printf("[ %s ] %s\n", tr_?"OK ":"FAIL", #name); \ + } while(0) + #define CHECK(cond) \ + do { \ + bool pass = !!(cond); \ + ++tr.num_assertions; \ + if(!pass) { \ + printf("%s:%d: fail! %s\n", __FILE__, __LINE__, #cond); \ + ++tr.num_failed_assertions; \ + } \ + } while(0) + #define CHECK_MSG(cond, fmt, ...) \ + do { \ + bool pass = !!(cond); \ + ++tr.num_assertions; \ + if(!pass) { \ + printf("%s:%d: fail! %s:" fmt "\n", __FILE__, __LINE__, #cond, ## __VA_ARGS__); \ + ++tr.num_failed_assertions; \ + } \ + } while(0) + + TestResult test(ysparse *ryml2evt) const + { + TestResult tr = {}; + _runtest(test_evt_large_enough, ); + _runtest(test_evt_too_small, ); + _runtest(test_evt_nullptr, ); + _runtest(test_evt_large_enough_reuse, ryml2evt); + _runtest(test_evt_too_small_reuse, ryml2evt); + _runtest(test_evt_nullptr_reuse, ryml2evt); + return tr; + } + + // happy path: large-enough destination string + TestResult test_evt_large_enough_reuse(ysparse *ryml2evt) const + { + if(evt.empty()) return {}; + TestResult tr = {}; + std::string input_(ys.begin(), ys.end()); + substr input = c4::to_substr(input_); + std::vector output; + output.resize(2 * expected_size(evt)); + size_type reqsize = ysparse_parse(ryml2evt, "ysfilename", + input.str, (size_type)input.len, + &output[0], (size_type)output.size()); + CHECK_MSG((size_t)reqsize == expected_size(evt), "%d vs %zu", reqsize, expected_size(evt)); + CHECK(reqsize != 0); + output.resize(reqsize); + CHECK(testeq(output, input)); + return tr; + } + TestResult test_evt_large_enough() const + { + Ys2EvtScoped lib; + return test_evt_large_enough_reuse(lib.ryml2evt); + } + + // less-happy path: destination string not large enough + TestResult test_evt_too_small_reuse(ysparse *ryml2evt) const + { + TestResult tr = {}; + std::string input_(ys.begin(), ys.end()); + substr input = c4::to_substr(input_); + std::vector output; + output.resize(expected_size(evt)); + size_type reqsize = ysparse_parse(ryml2evt, "ysfilename", + input.str, (size_type)input.len, + output.data(), (size_type)output.size()); + CHECK(reqsize == expected_size(evt)); + CHECK(reqsize != 0); + output.resize(reqsize); + input_.assign(ys.begin(), ys.end()); // FIXME + input = c4::to_substr(input_); + size_type reqsize2 = ysparse_parse(ryml2evt, "ysfilename", + input.str, (size_type)input.len, + output.data(), (size_type)output.size()); + CHECK(reqsize2 == reqsize); + output.resize(reqsize2); + CHECK(testeq(output, input)); + return tr; + } + TestResult test_evt_too_small() const + { + Ys2EvtScoped lib; + return test_evt_too_small_reuse(lib.ryml2evt); + } + + // safe calling with nullptr + TestResult test_evt_nullptr_reuse(ysparse *ryml2evt) const + { + TestResult tr = {}; + std::string input_(ys.begin(), ys.end()); + substr input = c4::to_substr(input_); + size_type reqsize = ysparse_parse(ryml2evt, "ysfilename", + input.str, (size_type)input.len, + nullptr, 0); + CHECK(reqsize == expected_size(evt)); + CHECK(reqsize != 0); + std::vector output; + output.resize(reqsize); + input_.assign(ys.begin(), ys.end()); // FIXME + input = c4::to_substr(input_); + size_type reqsize2 = ysparse_parse(ryml2evt, "ysfilename", + input.str, (size_type)input.len, + output.data(), (size_type)output.size()); + CHECK(reqsize2 == reqsize); + CHECK(reqsize2 == output.size()); + CHECK(testeq(output, input)); + return tr; + } + TestResult test_evt_nullptr() const + { + Ys2EvtScoped lib; + return test_evt_nullptr_reuse(lib.ryml2evt); + } + +public: + + bool testeq(std::vector const& actual, csubstr parsed_source) const + { + int status = true; + size_t num_events_expected = evt.size(); + size_t num_ints_expected = expected_size(evt); + bool same_size = true; + if(actual.size() != num_ints_expected) + { + printf("------\n" + "FAIL: different size\n" + "input:~~~%.*s~~~\n" + "expected size:~~~%zu~~~\n" + "actual size:~~~%zu~~~\n", + (int)ys.len, ys.str, + num_ints_expected, + actual.size()); + same_size = false; + } + for(size_t i = 0, ie = 0; ie < num_events_expected; ++ie) + { + if(i >= actual.size()) + { + printf("fail: bad actual size. i=%zu vs %zu=actual.size()=\n", i, actual.size()); + status = false; + break; + } + #define _testcmp(fmt, cmp, ...) \ + if(showcmp) { printf("status=%d cmp=%d evt=%zu i=%zu: " fmt "\n", status, (cmp), ie, i, ## __VA_ARGS__); } \ + status &= (cmp) + char actualbuf[100]; + char expectedbuf[100]; + size_t reqsize_actual = c4::bm2str(actual[i] & evt::MASK, actualbuf, sizeof(actualbuf)); + size_t reqsize_expected = c4::bm2str(evt[ie].flags & evt::MASK, expectedbuf, sizeof(expectedbuf)); + C4_CHECK(reqsize_actual < sizeof(actualbuf)); + C4_CHECK(reqsize_expected < sizeof(expectedbuf)); + _testcmp("exp=%d(%s) vs act=%d(%s)", evt[ie].flags == actual[i], evt[ie].flags, expectedbuf, actual[i], actualbuf); + status &= (evt[ie].flags == actual[i]); + if((evt[ie].flags & evt::HAS_STR) && (actual[i] & evt::HAS_STR)) + { + _testcmp(" exp=%d vs act=%d", evt[ie].str_start == actual[i + 1], evt[ie].str_start, actual[i + 1]); + _testcmp(" exp=%d vs act=%d", evt[ie].str_len == actual[i + 2], evt[ie].str_len, actual[i + 2]); + bool safeactual = (i + 2 < actual.size()) && (actual[i + 1] < (int)parsed_source.len && actual[i + 1] + actual[i + 2] <= (int)parsed_source.len); + bool safeexpected = (evt[ie].str_start < (int)parsed_source.len && evt[ie].str_start + evt[ie].str_len <= (int)parsed_source.len); + _testcmp(" safeactual=%d", safeactual, safeactual); + _testcmp(" safeactual=%d safeexpected=%d", safeactual == safeexpected, safeactual, safeexpected); + if(safeactual && safeexpected) + { + csubstr evtstr = parsed_source.sub((size_t)evt[ie].str_start, (size_t)evt[ie].str_len); + csubstr actualstr = parsed_source.sub((size_t)actual[i + 1], (size_t)actual[i + 2]); + _testcmp(" ref=[%zu]~~~%.*s~~~ vs act=[%zu]~~~%.*s~~~", + evt[ie].scalar == actualstr, + evt[ie].scalar.len, (int)evt[ie].scalar.len, evt[ie].scalar.str, + actualstr.len, (int)actualstr.len, actualstr.str); + if( ! evt[ie].needs_filter) + { + _testcmp(" exp=[%zu]~~~%.*s~~~ vs act=[%zu]~~~%.*s~~~", + evtstr == actualstr, + evtstr.len, (int)evtstr.len, evtstr.str, + actualstr.len, (int)actualstr.len, actualstr.str); + } + } + } + i += (actual[i] & evt::HAS_STR) ? 3 : 1; + } + if(!status) + printf("------\n" + "FAIL:\n" + "input:~~~%.*s~~~\n", + (int)ys.len, ys.str); + return status && same_size; + } +}; + + +//----------------------------------------------------------------------------- + +namespace { +// make the declarations shorter +#define tc(ys, ...) {ys, std::vector(__VA_ARGS__)} +#define e(...) EvtWithScalar{__VA_ARGS__} +using namespace evt; +inline constexpr bool needs_filter = true; +const TestCase test_cases[] = { + // case ------------------------------------------------- + tc("a: 1", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 0, 1, "a"), + e(VAL_|SCLR|PLAI, 3, 1, "1"), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc("say: 2 + 2", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 0, 3, "say"), + e(VAL_|SCLR|PLAI, 5, 5, "2 + 2"), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc("𝄞: ✅", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 0, 4, "𝄞"), + e(VAL_|SCLR|PLAI, 6, 3, "✅"), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc("[a, b, c]", + { + e(BSTR), + e(BDOC), + e(VAL_|BSEQ|FLOW), + e(VAL_|SCLR|PLAI, 1, 1, "a"), + e(VAL_|SCLR|PLAI, 4, 1, "b"), + e(VAL_|SCLR|PLAI, 7, 1, "c"), + e(ESEQ), + e(EDOC), + e(ESTR), + }), + // case ------------------------------ + tc("[a: b]", + { + e(BSTR), + e(BDOC), + e(VAL_|BSEQ|FLOW), + e(VAL_|BMAP|FLOW), + e(KEY_|SCLR|PLAI, 1, 1, "a"), + e(VAL_|SCLR|PLAI, 4, 1, "b"), + e(EMAP), + e(ESEQ), + e(EDOC), + e(ESTR), + }), + // case ------------------------------ + tc(R"(--- !yamlscript/v0 +foo: ! +- {x: y} +- [x, y] +- foo +- 'foo' +- "foo" +- | + foo +- > + foo +- [1, 2, true, false, null] +- &anchor-1 !tag-1 foobar +--- +another: doc +)", + { + e(BSTR), + e(BDOC|EXPL), + e(VAL_|TAG_, 5, 13, "yamlscript/v0"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 19, 3, "foo"), + e(VAL_|TAG_, 25, 0, ""), + e(VAL_|BSEQ|BLCK), + e(VAL_|BMAP|FLOW), + e(KEY_|SCLR|PLAI, 29, 1, "x"), + e(VAL_|SCLR|PLAI, 32, 1, "y"), + e(EMAP), + e(VAL_|BSEQ|FLOW), + e(VAL_|SCLR|PLAI, 38, 1, "x"), + e(VAL_|SCLR|PLAI, 41, 1, "y"), + e(ESEQ), + e(VAL_|SCLR|PLAI, 46, 3, "foo"), + e(VAL_|SCLR|SQUO, 53, 3, "foo"), + e(VAL_|SCLR|DQUO, 61, 3, "foo"), + e(VAL_|SCLR|LITL, 70, 4, "foo\n", needs_filter), + e(VAL_|SCLR|FOLD, 80, 4, "foo\n", needs_filter), + e(VAL_|BSEQ|FLOW), + e(VAL_|SCLR|PLAI, 89, 1, "1"), + e(VAL_|SCLR|PLAI, 92, 1, "2"), + e(VAL_|SCLR|PLAI, 95, 4, "true"), + e(VAL_|SCLR|PLAI, 101, 5, "false"), + e(VAL_|SCLR|PLAI, 108, 4, "null"), + e(ESEQ), + e(VAL_|TAG_, 127, 5, "tag-1"), + e(VAL_|ANCH, 117, 8, "anchor-1"), + e(VAL_|SCLR|PLAI, 133, 6, "foobar"), + e(ESEQ), + e(EMAP), + e(EDOC), + e(BDOC|EXPL), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 144, 7, "another"), + e(VAL_|SCLR|PLAI, 153, 3, "doc"), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc(R"(plain: well + a + b + c +squo: 'single''quote' +dquo: "x\t\ny" +lit: | + X + Y + Z +fold: > + U + V + W +)", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 0, 5, "plain"), + e(VAL_|SCLR|PLAI, 7, 10, "well a b c"), + e(KEY_|SCLR|PLAI, 24, 4, "squo"), + e(VAL_|SCLR|SQUO, 31, 12, "single'quote", needs_filter), + e(KEY_|SCLR|PLAI, 46, 4, "dquo"), + e(VAL_|SCLR|DQUO, 53, 4, "x\t\ny", needs_filter), + e(KEY_|SCLR|PLAI, 61, 3, "lit"), + e(VAL_|SCLR|LITL, 68, 6, "X\nY\nZ\n", needs_filter), + e(KEY_|SCLR|PLAI, 89, 4, "fold"), + e(VAL_|SCLR|FOLD, 97, 6, "U V W\n", needs_filter), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc("- !!seq []", + { + e(BSTR), + e(BDOC), + e(VAL_|BSEQ|BLCK), + e(VAL_|TAG_, 2, 5, "!!seq"), + e(VAL_|BSEQ|FLOW), + e(ESEQ), + e(ESEQ), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc(R"_(defn run(prompt session=nil): + when session: + write session _ :append true: |+ + Q: $(orig-prompt:trim) + A ($api-model): + $(answer:trim) +)_", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 0, 28, "defn run(prompt session=nil)"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 32, 12, "when session"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 50, 28, "write session _ :append true"), + e(VAL_|SCLR|LITL, 83, 54, "Q: $(orig-prompt:trim)\nA ($api-model):\n$(answer:trim)\n", needs_filter), + e(EMAP), + e(EMAP), + e(EMAP), + e(EDOC), + e(ESTR), + }), + // case ------------------------------------------------- + tc(R"_(#!/usr/bin/env ys-0 + +defn run(prompt session=nil): + session-text =: + when session && session:fs-e: + + answer =: + cond: + api-model =~ /^dall-e/: + openai-image(prompt).data.0.url + api-model.in?(anthropic-models): + anthropic(prompt):anthropic-message:format + api-model.in?(groq-models): + groq(prompt).choices.0.message.content:format + api-model.in?(openai-models): + openai-chat(prompt).choices.0.message.content:format + else: die() + + say: answer + + when session: + write session _ :append true: |+ + Q: $(orig-prompt:trim) + A ($api-model): + $(answer:trim) + +)_", + { + e(BSTR), + e(BDOC), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 21, 28, "defn run(prompt session=nil)"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 53, 14, "session-text ="), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 73, 28, "when session && session:fs-e"), + e(VAL_|SCLR|PLAI, 0, 0, ""), // note empty scalar pointing at the front + e(EMAP), + e(KEY_|SCLR|PLAI, 106, 8, "answer ="), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 120, 4, "cond"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 132, 22, "api-model =~ /^dall-e/"), + e(VAL_|SCLR|PLAI, 164, 31, "openai-image(prompt).data.0.url"), + e(KEY_|SCLR|PLAI, 202, 31, "api-model.in?(anthropic-models)"), + e(VAL_|SCLR|PLAI, 243, 42, "anthropic(prompt):anthropic-message:format"), + e(KEY_|SCLR|PLAI, 292, 26, "api-model.in?(groq-models)"), + e(VAL_|SCLR|PLAI, 328, 45, "groq(prompt).choices.0.message.content:format"), + e(KEY_|SCLR|PLAI, 380, 28, "api-model.in?(openai-models)"), + e(VAL_|SCLR|PLAI, 418, 52, "openai-chat(prompt).choices.0.message.content:format"), + e(KEY_|SCLR|PLAI, 477, 4, "else"), + e(VAL_|SCLR|PLAI, 483, 5, "die()"), + e(EMAP), + e(EMAP), + e(KEY_|SCLR|PLAI, 492, 3, "say"), + e(VAL_|SCLR|PLAI, 497, 6, "answer"), + e(KEY_|SCLR|PLAI, 507, 12, "when session"), + e(VAL_|BMAP|BLCK), + e(KEY_|SCLR|PLAI, 525, 28, "write session _ :append true"), + e(VAL_|SCLR|LITL, 558, 55, "Q: $(orig-prompt:trim)\nA ($api-model):\n$(answer:trim)\n\n", needs_filter), + e(EMAP), + e(EMAP), + e(EMAP), + e(EDOC), + e(ESTR), + }), +}; +} // namespace + +int main(int argc, const char *argv[]) +{ + for(int i = 1; i < argc; ++i) + { + csubstr arg = ryml::to_csubstr(argv[i]); + if(arg == "--timing" || arg == "-t") + ysparse_timing_set(true); + } + Ys2EvtScoped ys2evt; + TestResult total = {}; + size_t failed_cases = {}; + size_t num_cases = C4_COUNTOF(test_cases); + for(size_t i = 0; i < C4_COUNTOF(test_cases); ++i) + { + printf("-----------------------------------------\n" + "case %zu/%zu ...\n" + "[%zu]~~~%.*s~~~\n", i, num_cases, test_cases[i].ys.len, (int)test_cases[i].ys.len, test_cases[i].ys.str); + const TestResult tr = test_cases[i].test(ys2evt.ryml2evt); + total.add(tr); + failed_cases += (!tr); + printf("case %zu/%zu: %s\n", i, C4_COUNTOF(test_cases), tr ? "ok!" : "failed"); + } + printf("assertions: %u/%u pass %u/%u fail\n", total.num_assertions - total.num_failed_assertions, total.num_assertions, total.num_failed_assertions, total.num_assertions); + printf("tests: %u/%u pass %u/%u fail\n", total.num_tests - total.num_failed_tests, total.num_tests, total.num_failed_tests, total.num_tests); + printf("cases: %zu/%zu pass %zu/%zu fail\n", num_cases-failed_cases, num_cases, failed_cases, num_cases); + if(total) + printf("TESTS SUCCEED!\n"); + return total ? 0 : -1; +} diff --git a/rapidyaml/pom.xml b/rapidyaml/pom.xml new file mode 100644 index 000000000..54902b7fd --- /dev/null +++ b/rapidyaml/pom.xml @@ -0,0 +1,115 @@ + + + + + rapidyaml + + 0.8.0 + + rapidyaml + + org.rapidyaml + + 4.0.0 + + + rapidyaml is a C++ library to parse and emit YAML, and do it fast. + + + https://rapidyaml.org + + + + MIT License + http://www.opensource.org/licenses/mit-license.php + repo + + + + + + clojars + Clojars repository + https://clojars.org/repo + + + + + UTF-8 + 1.8 + 1.8 + + + + + junit + junit + 3.8.1 + + + org.json + json + 20240205 + + + + + + + + + maven-clean-plugin + 3.1.0 + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + + + + + + maven-project-info-reports-plugin + + + + diff --git a/rapidyaml/src/README.md b/rapidyaml/src/README.md new file mode 100644 index 000000000..d920cfa6b --- /dev/null +++ b/rapidyaml/src/README.md @@ -0,0 +1,33 @@ +## Notes on JNI vs JNA +, packaging, loading, etc + +From a [thread on slack](https://app.slack.com/client/T03RZGPFR/activity) + + > JNI is almost always the fasted ffi option. I've heard good things about JNR performance. JNA is usually one of the slower ffi options, but can be sped up with direct mapping, https://github.com/java-native-access/jna/blob/master/www/DirectMapping.md. + + +## Notes on how to profile + +From a [thread on slack](https://app.slack.com/client/T03RZGPFR/activity) + + > Another tool to try to find the bottleneck is https://github.com/clojure-goes-fast/clj-async-profiler. The flamegraph might show some obvious performance issue (assuming the issue is on the jvm side). + + > Depending on how long a parse takes, I would recommend something like https://github.com/hugoduncan/criterium. time is not a good way to benchmark code unless it's a very slow function call. I would use the profiler to try and figure out where the bottleneck is. + + +## JNI examples + +- [full JNI example](https://github.com/mkowsiak/jnicookbook/tree/master/recipes/recipeNo031) +- [full JNI example with other non-JNI shared libraries](https://github.com/mkowsiak/jnicookbook/tree/master/recipes/recipeNo035) +- [another example linking with more libraries](https://www.dynamsoft.com/codepool/package-jni-shared-library-jar-file.html) + +- https://stackoverflow.com/questions/1611357/how-to-make-a-jar-file-that-includes-dll-files#comment1483970_1611367 + +## Notes on JNI - how to call c++ code from java + +From a [thread on slack](https://app.slack.com/client/T03RZGPFR/activity) + + > I'm surprised trying to pass a mutable byte buffer doesn't cause more issues. I think the recommended way to pass a byte buffer to native is with http://java-native-access.github.io/jna/5.13.0/javadoc/com/sun/jna/Memory.html and you can get the string with .getString. + + > > But I was thinking whether it is possible/practical/advisable (in terms of speed) to build the Clojure dictionary directly in the C++ code. Currently the C++ code is providing an EDN markup string that is later parsed in Clojure. Assuming a large dictionary of say ~40k entries, would it be possible to call native clojure/java JNI functions to build the final structure instead of creating the intermediate EDN? Would that be a gain? Would the ~40k calls to JNI end up costing too much? + > This isn't something I've tried before, so take it with a grain of salt, but you have at least a few options with different tradeoffs. All the collection types in clojure are based on protocols/interfaces, so it would be possible to just return a pointer, with no copying, and wrap it proxy that implements all the relevant interfaces for maps/lists/etc. When JVM code asks for a value from a map or element from list, you produce the JVM value for numbers/strings or you return another proxy pointer if it's a collection. You might still have to make a copy to return a string value. If you're returning large values that you expect will only be partially read or read only once, then lazily producing jvm values might be a win. If you expect the large value to be completely read multiple times, then it could be potentially faster to just convert the full data structure to a JVM value. There's also intermediate options where you do some of the work upfront, and do some of the work lazily. Granularity will also affect memory usage. You probably don't want some scenario where someone parses a giant blob and keeps only a small part, but still has to hold the giant value in memory until the small part gets reclaimed.I don't have a good answer for you here. My intuition is that your approach of building the final data structure in c++ is probably a good idea, but I don't really have the experience to say for sure.This type of question might get a better answer in #data-science. I think they similar issues with dealing with large datasets that are partially processed in native code. They've also built deep integrations with python via https://github.com/clj-python/libpython-clj where I think they've run into similar problems. diff --git a/rapidyaml/src/main/java/org/rapidyaml/Evt.java b/rapidyaml/src/main/java/org/rapidyaml/Evt.java new file mode 100644 index 000000000..5305610be --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/Evt.java @@ -0,0 +1,44 @@ +package org.rapidyaml; + +public class Evt { + // Event types + public static final int BSTR = 1 << 0; // +STR + public static final int ESTR = 1 << 1; // -STR + public static final int BDOC = 1 << 2; // +DOC + public static final int EDOC = 1 << 3; // -DOC + public static final int BMAP = 1 << 4; // +MAP + public static final int EMAP = 1 << 5; // -MAP + public static final int BSEQ = 1 << 6; // +SEQ + public static final int ESEQ = 1 << 7; // -SEQ + public static final int SCLR = 1 << 8; // =VAL + public static final int ALIA = 1 << 9; // =ALI + + // Style flags + public static final int PLAI = 1 << 16; // : (plain scalar) + public static final int SQUO = 1 << 17; // ' (single-quoted scalar) + public static final int DQUO = 1 << 18; // " (double-quoted scalar) + public static final int LITL = 1 << 19; // | (block literal scalar) + public static final int FOLD = 1 << 20; // > (block folded scalar) + + public static final int FLOW = 1 << 21; // flow container: + // [] for seqs or {} for maps + public static final int BLCK = 1 << 22; // block container + + // Modifiers + public static final int ANCH = 1 << 24; // anchor + public static final int TAG_ = 1 << 25; // tag + + // Structure flags + public static final int KEY_ = 1 << 26; // as key + public static final int VAL_ = 1 << 27; // as value + public static final int EXPL = 1 << 28; // --- (with BDOC) or + // ... (with EDOC) + // (may be fused with FLOW + // if needed) + + // Utility flags + public static final int LAST = EXPL; + public static final int MASK = ((LAST << 1) - 1); + public static final int HAS_STR = SCLR|ALIA|ANCH|TAG_; + +} diff --git a/rapidyaml/src/main/java/org/rapidyaml/Rapidyaml.java b/rapidyaml/src/main/java/org/rapidyaml/Rapidyaml.java new file mode 100644 index 000000000..f071e1ac9 --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/Rapidyaml.java @@ -0,0 +1,141 @@ +package org.rapidyaml; + +import org.rapidyaml.YamlParseErrorException; +import java.nio.charset.StandardCharsets; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.nio.ByteOrder; + +/** + * Interface with the shared librapidyaml library + */ +public class Rapidyaml +{ + public static String RAPIDYAML_VERSION = "0.8.0"; + + private native void ysparse_timing_set(boolean yes); + // TODO: rename these to ysparse_init() etc + private native long ysparse_init(); + private native void ysparse_destroy(long ysparse); + private native int ysparse_parse(long ysparse, String filename, + byte[] ys, int ys_length, + int[] evt, int evt_length); + private native int ysparse_parse_buf(long ysparse, String filename, + ByteBuffer ys, int ys_length, + IntBuffer evt, int evt_length); + + private final long ysparse; + + public Rapidyaml() + { + String library_name = "rapidyaml"; // ." + RAPIDYAML_VERSION; + System.loadLibrary(library_name); + this.ysparse = this.ysparse_init(); + // TODO: receive this argument as ctor parameter + timingEnabled(System.getenv("YS_RYML_TIMER") != null); + } + + // Likely bad idea to implement finalize: + // + // https://stackoverflow.com/questions/158174/why-would-you-ever-implement-finalize + // + protected void finalize() throws Throwable + { + try { + this.ysparse_destroy(this.ysparse); + } + finally { + super.finalize(); + } + } + + + //------------------------ + // EVT + //------------------------ + + public int parseYsToEvt(byte[] src, int[] evts) throws Exception + { + return parseYsToEvt("yamlscript", src, evts); + } + + public int parseYsToEvtBuf(ByteBuffer src, IntBuffer evt) throws Exception + { + return parseYsToEvtBuf("yamlscript", src, evt); + } + + public int parseYsToEvt(String filename, byte[] src, int[] evts) throws Exception + { + long t = timingStart("ysparse"); + int required_size = ysparse_parse(this.ysparse, filename, src, src.length, evts, evts.length); + timingStop("ysparse", t, src.length); + return required_size; + } + + public int parseYsToEvtBuf(String filename, ByteBuffer src, IntBuffer evt) throws Exception + { + if(!src.isDirect()) + throw new RuntimeException("src must be direct"); + if(!evt.isDirect()) + throw new RuntimeException("evt must be direct"); + // the byte order for src does not matter + // but for evt it really does + if(evt.order() != ByteOrder.nativeOrder()) + throw new RuntimeException("evt byte order must be native"); + long t = timingStart("ysparseBuf"); + evt.position(evt.capacity()); + int reqsize = ysparse_parse_buf(this.ysparse, filename, src, src.position(), evt, evt.capacity()); + if(reqsize <= evt.capacity()) { + evt.position(reqsize); + } + timingStop("ysparseBuf", t, src.position()); + return reqsize; + } + + public static IntBuffer mkIntBuffer(int numInts) + { + ByteBuffer bb = ByteBuffer.allocateDirect(/*numBytes*/4 * numInts); + // !!! need to explicitly set the byte order to the native order + return bb.order(ByteOrder.nativeOrder()).asIntBuffer(); + } + + + //------------------------ + // TIME + //------------------------ + + private boolean showTiming = true; + + public void timingEnabled(boolean yes) + { + showTiming = yes; + ysparse_timing_set(yes); + } + + private long timingStart(String name) + { + if(showTiming) { + System.out.printf(" java:%s...\n", name); + return System.nanoTime(); + } + return 0; + } + + private void timingStop(String name, long t) + { + if(showTiming) { + t = System.nanoTime() - t; + System.out.printf(" java:%s: %.6fms\n", name, (float)t/1.e6f); + } + } + + private void timingStop(String name, long t, int numBytes) + { + if(showTiming) { + t = System.nanoTime() - t; + float dt = (float)t; + float fb = (float)numBytes; + System.out.printf(" java:%s: %.6fms %.3fMB/s %dB\n", name, dt/1.e6f, 1.e3f*fb/dt, numBytes); + } + } +} diff --git a/rapidyaml/src/main/java/org/rapidyaml/YamlParseErrorException.java b/rapidyaml/src/main/java/org/rapidyaml/YamlParseErrorException.java new file mode 100644 index 000000000..d5d4730df --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/YamlParseErrorException.java @@ -0,0 +1,16 @@ +package org.rapidyaml; + +// https://www.baeldung.com/java-new-custom-exception +public class YamlParseErrorException extends Exception +{ + public final int offset; + public final int line; + public final int column; + public YamlParseErrorException(int offset_, int line_, int column_, String msg) + { + super(msg); + offset = offset_; + line = line_; + column = column_; + } +} diff --git a/rapidyaml/src/main/java/org/rapidyaml/cmp/CmpEvt.java b/rapidyaml/src/main/java/org/rapidyaml/cmp/CmpEvt.java new file mode 100644 index 000000000..19b6fab7c --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/cmp/CmpEvt.java @@ -0,0 +1,104 @@ +package cmp; + +import org.rapidyaml.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; + +// https://stackoverflow.com/questions/804466/how-do-i-create-executable-java-program +public class CmpEvt +{ + public static void main(String[] args) throws Exception + { + Rapidyaml rapidyaml = new Rapidyaml(); + rapidyaml.timingEnabled(true); + compareEvt(rapidyaml, "./yamllm.ys"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/appveyor.yml"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/compile_commands.json"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_seqs_flow_outer1000_inner100.yml"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_maps_flow_outer1000_inner100.yml"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_seqs_flow_outer1000_inner1000.yml"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_maps_flow_outer1000_inner1000.yml"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_seqs_flow_outer1000_inner1000_json.json"); + compareEvt(rapidyaml, "/home/jpmag/proj/rapidyaml/bm/cases/style_maps_flow_outer1000_inner1000_json.yml"); + } + + public static void compareEvt(Rapidyaml rapidyaml, String path) throws Exception + { + String ys_ = java.nio.file.Files.readString(Paths.get(path), StandardCharsets.UTF_8); + byte[] ys = ys_.getBytes(StandardCharsets.UTF_8); + byte[] ysarr = new byte[ys.length]; + ByteBuffer ysbuf = ByteBuffer.allocateDirect(ys.length); + // + System.out.printf("-----\n"); + System.out.printf("%s\n", path); + System.out.printf(" ys.length=%d\n", ys.length); + // + long t = timingStart("evt"); + int[] evtarr = callEvt(rapidyaml, ys, ysarr); + timingStop("evt", t, ys.length); + // + t = timingStart("evtBuf"); + IntBuffer evtbuf = callEvtBuf(rapidyaml, ys, ysbuf); + timingStop("evtBuf", t, ys.length); + } + + static int[] callEvt(Rapidyaml rapidyaml, byte[] src, byte[] srcbuf) throws Exception + { + System.arraycopy(src, 0, srcbuf, 0, src.length); + int[] evt = new int[10000000]; + int reqsize = rapidyaml.parseYsToEvt(srcbuf, evt); + if(reqsize > evt.length) { + evt = new int[reqsize]; + System.arraycopy(src, 0, srcbuf, 0, src.length); + int reqsize2 = rapidyaml.parseYsToEvt(srcbuf, evt); + if(reqsize2 != reqsize) { + throw new RuntimeException("reqsize"); + } + return evt; + } + int[] ret = new int[reqsize]; + System.arraycopy(evt, 0, ret, 0, reqsize); + return ret; + } + + static IntBuffer callEvtBuf(Rapidyaml rapidyaml, byte[] src, ByteBuffer srcbuf) throws Exception + { + srcbuf.position(0); + srcbuf.put(src); + IntBuffer evt = Rapidyaml.mkIntBuffer(10000000); + int reqsize = rapidyaml.parseYsToEvtBuf(srcbuf, evt); + if(reqsize > evt.capacity()) { + evt = Rapidyaml.mkIntBuffer(reqsize); + srcbuf.position(0); + srcbuf.put(src); + int reqsize2 = rapidyaml.parseYsToEvtBuf(srcbuf, evt); + if(reqsize2 != reqsize) { + throw new RuntimeException("reqsize"); + } + } + evt.position(reqsize); + return evt; + } + + static private long timingStart(String name) + { + System.out.printf(" call:%s...\n", name); + return System.nanoTime(); + } + static private void timingStop(String name, long t) + { + t = System.nanoTime() - t; + System.out.printf(" call:%s: %.6fms\n", name, (float)t/1.e6f); + } + static private void timingStop(String name, long t, int numBytes) + { + t = System.nanoTime() - t; + float dt = (float)t; + float fb = (float)numBytes; + System.out.printf(" call:%s: %.6fms %.3fMB/s %dB\n", name, dt/1.e6f, 1.e3f*fb/dt, numBytes); + } +} diff --git a/rapidyaml/src/main/java/org/rapidyaml/cmp/manifest.mf b/rapidyaml/src/main/java/org/rapidyaml/cmp/manifest.mf new file mode 100644 index 000000000..aa44b6579 --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/cmp/manifest.mf @@ -0,0 +1 @@ +Main-class: cmp.CmpEvt diff --git a/rapidyaml/src/main/java/org/rapidyaml/cmp/run.sh b/rapidyaml/src/main/java/org/rapidyaml/cmp/run.sh new file mode 100755 index 000000000..4f655cfe2 --- /dev/null +++ b/rapidyaml/src/main/java/org/rapidyaml/cmp/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -xe + +thisdir=$(dirname $0) +nativedir=$(cd $thisdir/../../../../../../native ; pwd) +rymldir=$(cd $nativedir/.. ; pwd) + +make -C $nativedir build RAPIDYAML_TIMED=1 +make -C $rymldir test RAPIDYAML_TIMED=1 + + +cd $thisdir +if [ ! -f yamllm.ys ] ; then + wget https://raw.githubusercontent.com/yaml/yamllm/refs/heads/main/bin/yamllm.ys +fi +ls -lFhp +jd=${jd:-/usr/lib/jvm/java-23-openjdk/bin} +$jd/javac -d . ../*.java +$jd/javac -d . -cp . CmpEvt.java +$jd/jar -cmf manifest.mf CmpEvt.jar cmp org +$jd/java -jar -Djava.library.path=$nativedir CmpEvt.jar diff --git a/rapidyaml/src/site/site.xml b/rapidyaml/src/site/site.xml new file mode 100644 index 000000000..27de8ef17 --- /dev/null +++ b/rapidyaml/src/site/site.xml @@ -0,0 +1,26 @@ + + + + + rapidyaml + https://maven.apache.org/images/apache-maven-project.png + https://www.apache.org/ + + + + https://maven.apache.org/images/maven-logo-black-on-white.png + https://maven.apache.org/ + + + + org.apache.maven.skins + maven-fluido-skin + 1.7 + + + + + + + diff --git a/rapidyaml/src/test/java/org/rapidyaml/RapidyamlTest.java b/rapidyaml/src/test/java/org/rapidyaml/RapidyamlTest.java new file mode 100644 index 000000000..7a2af04d7 --- /dev/null +++ b/rapidyaml/src/test/java/org/rapidyaml/RapidyamlTest.java @@ -0,0 +1,475 @@ +package org.rapidyaml; + +import org.rapidyaml.*; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; +import java.nio.charset.StandardCharsets; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + + +/** + * Unit test for simple App. + */ +public class RapidyamlTest extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public RapidyamlTest(String testName) + { + super(testName); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite(RapidyamlTest.class); + } + + public void testPlainMap() + { + String ys = "a: 1"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC), + mkev(Evt.VAL_|Evt.BMAP|Evt.BLCK), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 0, 1, "a"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 3, 1, "1"), + mkev(Evt.EMAP), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testUtf8() + { + String ys = "𝄞: ✅"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC), + mkev(Evt.VAL_|Evt.BMAP|Evt.BLCK), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 0, 4, "𝄞"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 6, 3, "✅"), + mkev(Evt.EMAP), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testTaggedInt() + { + String ys = "- !!int 42"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC), + mkev(Evt.VAL_|Evt.BSEQ|Evt.BLCK), + mkev(Evt.VAL_|Evt.TAG_, 2, 5, "!!int"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 8, 2, "42"), + mkev(Evt.ESEQ), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testTaggedSeq() + { + String ys = "- !!seq []"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC), + mkev(Evt.VAL_|Evt.BSEQ|Evt.BLCK), + mkev(Evt.VAL_|Evt.TAG_, 2, 5, "!!seq"), + mkev(Evt.VAL_|Evt.BSEQ|Evt.FLOW), + mkev(Evt.ESEQ), + mkev(Evt.ESEQ), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testLargeCase() + { + String ys = "--- !yamlscript/v0\n" + + "foo: !\n" + + "- {x: y}\n" + + "- [x, y]\n" + + "- foo\n" + + "- 'foo'\n" + + "- \"foo\"\n" + + "- |\n" + + " foo\n" + + " literal\n" + + "- >\n" + + " foo\n" + + " folded\n" + + "- [1, 2, true, false, null]\n" + + "- &anchor-1 !tag-1 foobar\n" + + "---\n" + + "another: doc\n"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC|Evt.EXPL), + mkev(Evt.VAL_|Evt.TAG_, 5, 13, "yamlscript/v0"), + mkev(Evt.VAL_|Evt.BMAP|Evt.BLCK), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 19, 3, "foo"), + mkev(Evt.VAL_|Evt.TAG_, 25, 0, ""), + mkev(Evt.VAL_|Evt.BSEQ|Evt.BLCK), + mkev(Evt.VAL_|Evt.BMAP|Evt.FLOW), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 29, 1, "x"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 32, 1, "y"), + mkev(Evt.EMAP), + mkev(Evt.VAL_|Evt.BSEQ|Evt.FLOW), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 38, 1, "x"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 41, 1, "y"), + mkev(Evt.ESEQ), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 46, 3, "foo"), + mkev(Evt.VAL_|Evt.SCLR|Evt.SQUO, 53, 3, "foo"), + mkev(Evt.VAL_|Evt.SCLR|Evt.DQUO, 61, 3, "foo"), + mkev(Evt.VAL_|Evt.SCLR|Evt.LITL, 70, 12, "foo\nliteral\n"), + mkev(Evt.VAL_|Evt.SCLR|Evt.FOLD, 98, 11, "foo folded\n"), + mkev(Evt.VAL_|Evt.BSEQ|Evt.FLOW), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 124, 1, "1"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 127, 1, "2"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 130, 4, "true"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 136, 5, "false"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 143, 4, "null"), + mkev(Evt.ESEQ), + mkev(Evt.VAL_|Evt.TAG_, 162, 5, "tag-1"), + mkev(Evt.VAL_|Evt.ANCH, 152, 8, "anchor-1"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 168, 6, "foobar"), + mkev(Evt.ESEQ), + mkev(Evt.EMAP), + mkev(Evt.EDOC), + mkev(Evt.BDOC|Evt.EXPL), + mkev(Evt.VAL_|Evt.BMAP|Evt.BLCK), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 179, 7, "another"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 188, 3, "doc"), + mkev(Evt.EMAP), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testFilterCase() + { + String ys = "" + + "plain: well\n" + + " a\n" + + " b\n" + + " c\n" + + "squo: 'single''quote'\n" + + "dquo: \"x\\t\\ny\"\n" + + "lit: |\n" + + " X\n" + + " Y\n" + + " Z\n" + + "fold: >\n" + + " U\n" + + " V\n" + + " W\n"; + ExpectedEvent[] expected = { + mkev(Evt.BSTR), + mkev(Evt.BDOC), + mkev(Evt.VAL_|Evt.BMAP|Evt.BLCK), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 0, 5, "plain"), + mkev(Evt.VAL_|Evt.SCLR|Evt.PLAI, 7, 10, "well a b c"), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 24, 4, "squo"), + mkev(Evt.VAL_|Evt.SCLR|Evt.SQUO, 31, 12, "single'quote"), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 46, 4, "dquo"), + mkev(Evt.VAL_|Evt.SCLR|Evt.DQUO, 53, 4, "x\t\ny"), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 61, 3, "lit"), + mkev(Evt.VAL_|Evt.SCLR|Evt.LITL, 68, 6, "X\nY\nZ\n"), + mkev(Evt.KEY_|Evt.SCLR|Evt.PLAI, 89, 4, "fold"), + mkev(Evt.VAL_|Evt.SCLR|Evt.FOLD, 97, 6, "U V W\n"), + mkev(Evt.EMAP), + mkev(Evt.EDOC), + mkev(Evt.ESTR), + }; + testEvt_(ys, expected); + } + + public void testFailure() throws Exception + { + Rapidyaml rapidyaml = new Rapidyaml(); + String ys = ": : : :"; + byte[] src = ys.getBytes(StandardCharsets.UTF_8); + byte[] srcbuf = new byte[src.length]; + boolean gotit = false; + try { + callEvt(src, srcbuf); + } + catch(YamlParseErrorException e) { + gotit = true; + assertEquals(2, e.offset); + assertEquals(1, e.line); + assertEquals(3, e.column); + assertTrue(e.getMessage() != null); + assertFalse(e.getMessage().isEmpty()); + } + catch(RuntimeException e) { + fail("wrong exception type"); + } + catch(Exception e) { + fail("wrong exception type"); + } + assertTrue(gotit); + } + + public void testFailureBuf() throws Exception + { + Rapidyaml rapidyaml = new Rapidyaml(); + String ys = ": : : :"; + byte[] src = ys.getBytes(StandardCharsets.UTF_8); + ByteBuffer bbuf = ByteBuffer.allocateDirect(src.length); + bbuf.put(src); + boolean gotit = false; + try { + callEvtBuf(src, bbuf); + } + catch(YamlParseErrorException e) { + gotit = true; + assertEquals(2, e.offset); + assertEquals(1, e.line); + assertEquals(3, e.column); + assertTrue(e.getMessage() != null); + assertFalse(e.getMessage().isEmpty()); + } + catch(RuntimeException e) { + fail("wrong exception type"); + } + catch(Exception e) { + fail("wrong exception type"); + } + assertTrue(gotit); + } + + + private void testEvt_(String ys, ExpectedEvent[] expected) + { + byte[] src = ys.getBytes(StandardCharsets.UTF_8); + byte[] srcbuf = new byte[src.length]; + int[] actual; + try { + actual = callEvt(src, srcbuf); + } + catch (Exception e) { + fail("parse error:\n" + e.getMessage()); + actual = new int[1]; + } + try { + cmpEvt_(ys, srcbuf, actual, expected); + } + catch (Exception e) { + System.err.printf("error: evt (no buf)"); + throw e; + } + //------ + src = ys.getBytes(StandardCharsets.UTF_8); + ByteBuffer bbuf = ByteBuffer.allocateDirect(src.length); + bbuf.put(src); + IntBuffer buf; + try { + buf = callEvtBuf(src, bbuf); + actual = buf2arr(buf); + } + catch (Exception e) { + fail("parse error:\n" + e.getMessage()); + actual = new int[1]; + } + try { + cmpEvt_(ys, srcbuf, actual, expected); + } + catch (Exception e) { + System.err.printf("error: evtbuf"); + throw e; + } + } + + boolean dbglog = true; + private void cmpEvt_(String ys, byte[] src, int[] actual, ExpectedEvent[] expected) + { + if(dbglog) { + System.out.printf("----------------------\n~~~\n%s\n~~~\n", ys); + } + int numEvts = actual.length; + try { + int ia = 0; + int ie = 0; + int status = 1; + while(true) { + if((ia < numEvts) != (ie < expected.length)) { + System.out.printf("status=%d szActual=%d szExpected=%d\n", status, numEvts, ExpectedEvent.required_size_(expected)); + status = 0; + break; + } + if(ia >= numEvts) + break; + if(ie >= expected.length) + break; + int cmp = 1; + if(dbglog) + System.out.printf("status=%d evt=%d pos=%d expflags=%d actualflags=%d", status, ie, ia, expected[ie].flags, actual[ia]); + cmp &= (expected[ie].flags == actual[ia]) ? 1 : 0; + if(((actual[ia] & Evt.HAS_STR) != 0) && ((expected[ie].flags & Evt.HAS_STR)) != 0) { + cmp &= (ia + 2 < numEvts) ? 1 : 0; + if(cmp != 0) { + cmp &= (expected[ie].str_start == actual[ia + 1]) ? 1 : 0; + cmp &= (expected[ie].str_len == actual[ia + 2]) ? 1 : 0; + if(dbglog) + System.out.printf(" exp=(%d,%d) actual=(%d,%d)", expected[ie].str_start, expected[ie].str_len, actual[ia + 1], actual[ia + 2]); + if(cmp != 0) { + cmp &= (actual[ia + 1] >= 0) ? 1 : 0; + cmp &= (actual[ia + 2] >= 0) ? 1 : 0; + cmp &= (actual[ia + 1] + actual[ia + 2] <= src.length) ? 1 : 0; + if(cmp != 0) { + String actualStr = new String(src, actual[ia + 1], actual[ia + 2], StandardCharsets.UTF_8); + cmp &= actualStr.equals(expected[ie].str) ? 1 : 0; + if(dbglog) + System.out.printf(" exp=~~~%s~~~ actual=~~~%s~~~", expected[ie].str, actualStr); + } + else { + if(dbglog) + System.out.printf(" BAD RANGE len=%d", src.length); + } + } + } + } + if(dbglog) + System.out.printf(" --> %s\n", cmp != 0 ? "ok!" : "FAIL"); + status &= cmp; + ia += ((actual[ia] & Evt.HAS_STR) != 0) ? 3 : 1; + ++ie; + } + if(ExpectedEvent.required_size_(expected) != numEvts) + status = 0; + assertEquals(1, status); + } + catch (Exception e) { + System.err.println("expected:"); + System.err.println(expected); + System.err.println("actual"); + System.err.println(actual); + throw e; + } + } + + public static String buf2str(ByteBuffer edn) + { + int size = edn.position(); + size = size > 0 ? size - 1 : 0; + edn.position(0); + edn.limit(size); + return StandardCharsets.UTF_8.decode(edn).toString(); + } + + public static int[] buf2arr(IntBuffer evt) + { + int[] ret = new int[evt.position()]; + for(int i = 0; i < evt.position(); ++i) { + ret[i] = evt.get(i); + } + return ret; + } + + static int[] callEvt(byte[] src, byte[] srcbuf) throws Exception + { + Rapidyaml rapidyaml = new Rapidyaml(); + System.arraycopy(src, 0, srcbuf, 0, src.length); + int[] evt = new int[10000]; + int reqsize = rapidyaml.parseYsToEvt(srcbuf, evt); + if(reqsize > evt.length) { + evt = new int[reqsize]; + System.arraycopy(src, 0, srcbuf, 0, src.length); + int reqsize2 = rapidyaml.parseYsToEvt(srcbuf, evt); + if(reqsize2 != reqsize) { + throw new RuntimeException("reqsize"); + } + return evt; + } + int[] ret = new int[reqsize]; + System.arraycopy(evt, 0, ret, 0, reqsize); + return ret; + } + + static IntBuffer callEvtBuf(byte[] src, ByteBuffer srcbuf) throws Exception + { + Rapidyaml rapidyaml = new Rapidyaml(); + srcbuf.position(0); + srcbuf.put(src); + IntBuffer evt = Rapidyaml.mkIntBuffer(10000); + int reqsize = rapidyaml.parseYsToEvtBuf(srcbuf, evt); + if(reqsize > evt.capacity()) { + evt = Rapidyaml.mkIntBuffer(reqsize); + srcbuf.position(0); + srcbuf.put(src); + int reqsize2 = rapidyaml.parseYsToEvtBuf(srcbuf, evt); + if(reqsize2 != reqsize) { + throw new RuntimeException("reqsize"); + } + } + evt.position(reqsize); + return evt; + } + + ExpectedEvent mkev(int flags) + { + return new ExpectedEvent(flags); + } + + ExpectedEvent mkev(int flags, int offs, int len, String ref) + { + return new ExpectedEvent(flags, offs, len, ref); + } +} + +// the result is an array of integers, but we use this to simplify +// running the tests +class ExpectedEvent +{ + int flags; + int str_start; + int str_len; + String str; + ExpectedEvent(int flags) + { + this.flags = flags; + this.str_start = 0; + this.str_len = 0; + this.str = ""; + } + ExpectedEvent(int flags, int str_start, int str_len, String str) + { + this.flags = flags; + this.str_start = str_start; + this.str_len = str_len; + this.str = str; + } + int required_size() + { + return ((flags & Evt.HAS_STR) != 0) ? 3 : 1; + } + + public static int required_size_(ExpectedEvent[] evts) + { + int sz = 0; + for(int i = 0; i < evts.length; ++i) { + sz += evts[i].required_size(); + } + return sz; + } +}; diff --git a/util/RYS b/util/RYS new file mode 100755 index 000000000..1d53e01a0 --- /dev/null +++ b/util/RYS @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euo pipefail + +( + root=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")/.." && pwd -P) + root_ry=$root/rapidyaml + make --no-print-directory -C "$root_ry" build + export YS_PARSER_RAPIDYAML=1 + export LD_LIBRARY_PATH=$root_ry/native + bin=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd -P) + exec "$bin/YS" "$@" +) diff --git a/ys/Makefile b/ys/Makefile index 8044f4cbf..d80f4a20b 100644 --- a/ys/Makefile +++ b/ys/Makefile @@ -54,7 +54,7 @@ install: build install -m 755 $(YAMLSCRIPT_CLI_BIN_BASH) \ $(PREFIX)/bin/ -jar: $(YAMLSCRIPT_CLI_JAR_PATH) +jar: $(YAMLSCRIPT_CLI_JAR_PATH) $(RAPIDYAML_INSTALLED) @: test: test-unit @@ -70,7 +70,7 @@ test-run: $(BPAN_LOCAL) build $(BPAN_LOCAL): git clone --depth=1 $(BPAN_REPO_URL) $@ -$(YAMLSCRIPT_CLI_BIN): $(YAMLSCRIPT_CLI_JAR_PATH) +$(YAMLSCRIPT_CLI_BIN): $(YAMLSCRIPT_CLI_JAR_PATH) $(RAPIDYAML_INSTALLED) ifndef YS_BIN_DIR ifneq (true,$(LIBZ)) $(error *** \ @@ -106,6 +106,9 @@ $(YAMLSCRIPT_CLI_JAR_PATH): $(LEIN) $(YAMLSCRIPT_CORE_INSTALLED) $(YAMLSCRIPT_CL $< uberjar endif +$(RAPIDYAML_INSTALLED): + $(MAKE) -C $(RAPIDYAML) $@ + Dockerfile:: $(COMMON) Makefile cat \ $