From de01976503a9f0334c4db8f45d4ec5c752337525 Mon Sep 17 00:00:00 2001 From: ALancki1 <90658099+BradBillion@users.noreply.github.com> Date: Thu, 2 Apr 2026 09:47:29 -0400 Subject: [PATCH 1/2] Collect source text from ParseCallback during parsing --- src/main/java/io/github/treesitter/jtreesitter/Parser.java | 4 +++- .../java/io/github/treesitter/jtreesitter/ParserTest.java | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/github/treesitter/jtreesitter/Parser.java b/src/main/java/io/github/treesitter/jtreesitter/Parser.java index bf51cc5..3caa6a4 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Parser.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Parser.java @@ -260,6 +260,7 @@ public Optional parse( if (language == null) { throw new IllegalStateException("The parser has no language assigned"); } + final var collected = new StringBuilder(); var input = TSInput.allocate(arena); TSInput.payload(input, MemorySegment.NULL); @@ -271,6 +272,7 @@ public Optional parse( bytes.set(C_INT, 0, 0); return MemorySegment.NULL; } + collected.append(result); var buffer = result.getBytes(encoding.charset()); bytes.set(C_INT, 0, buffer.length); return arena.allocateFrom(C_CHAR, buffer); @@ -295,7 +297,7 @@ public Optional parse( tree = ts_parser_parse_with_options(self, old, input, parseOptions); } if (tree.equals(MemorySegment.NULL)) return Optional.empty(); - return Optional.of(new Tree(tree, language, null, null)); + return Optional.of(new Tree(tree, language, collected.toString(), encoding.charset())); } /** diff --git a/src/test/java/io/github/treesitter/jtreesitter/ParserTest.java b/src/test/java/io/github/treesitter/jtreesitter/ParserTest.java index edc6234..3638220 100644 --- a/src/test/java/io/github/treesitter/jtreesitter/ParserTest.java +++ b/src/test/java/io/github/treesitter/jtreesitter/ParserTest.java @@ -106,10 +106,10 @@ void parseLogger() { @DisplayName("parse(callback)") void parseCallback() { var source = "class Foo {}"; - ParseCallback callback = (offset, _) -> source.substring(offset, Integer.min(offset, source.length())); + ParseCallback callback = (offset, _) -> source.substring(offset, Integer.min(source.length(), offset + 1)); parser.setLanguage(language); try (var tree = parser.parse(callback, InputEncoding.UTF_8).orElseThrow()) { - assertNull(tree.getText()); + assertEquals(source, tree.getText()); assertEquals("program", tree.getRootNode().getType()); } } From 52a8e6aad43fb32402c884abe9158e5e4fb44865 Mon Sep 17 00:00:00 2001 From: ALancki1 <90658099+BradBillion@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:24:10 -0400 Subject: [PATCH 2/2] Use byte buffer for ParseCallback source collection --- .../io/github/treesitter/jtreesitter/Parser.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/github/treesitter/jtreesitter/Parser.java b/src/main/java/io/github/treesitter/jtreesitter/Parser.java index 3caa6a4..542420f 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Parser.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Parser.java @@ -260,7 +260,7 @@ public Optional parse( if (language == null) { throw new IllegalStateException("The parser has no language assigned"); } - final var collected = new StringBuilder(); + final var collected = new java.io.ByteArrayOutputStream(); var input = TSInput.allocate(arena); TSInput.payload(input, MemorySegment.NULL); @@ -272,8 +272,16 @@ public Optional parse( bytes.set(C_INT, 0, 0); return MemorySegment.NULL; } - collected.append(result); var buffer = result.getBytes(encoding.charset()); + var collectedSize = collected.size(); + if (index >= collectedSize) { + collected.write(buffer, 0, buffer.length); + } else { + var overlap = collectedSize - index; + if (overlap < buffer.length) { + collected.write(buffer, overlap, buffer.length - overlap); + } + } bytes.set(C_INT, 0, buffer.length); return arena.allocateFrom(C_CHAR, buffer); }, @@ -297,7 +305,7 @@ public Optional parse( tree = ts_parser_parse_with_options(self, old, input, parseOptions); } if (tree.equals(MemorySegment.NULL)) return Optional.empty(); - return Optional.of(new Tree(tree, language, collected.toString(), encoding.charset())); + return Optional.of(new Tree(tree, language, collected.toByteArray(), encoding.charset())); } /**