From d715940b242530c193b0a4476bddc423dffe7bcd Mon Sep 17 00:00:00 2001 From: Marcono1234 Date: Sat, 21 Sep 2024 16:09:17 +0200 Subject: [PATCH] fix(language): use correct scope for language pointer --- spotbugs-exclude.xml | 8 +++ .../treesitter/jtreesitter/Language.java | 59 ++++++++++++++++--- .../treesitter/jtreesitter/LanguageTest.java | 23 ++++++++ .../jtreesitter/languages/TreeSitterJava.java | 10 +++- 4 files changed, 89 insertions(+), 11 deletions(-) diff --git a/spotbugs-exclude.xml b/spotbugs-exclude.xml index bd100f7..ea86a2f 100644 --- a/spotbugs-exclude.xml +++ b/spotbugs-exclude.xml @@ -15,4 +15,12 @@ + + + + + + + + diff --git a/src/main/java/io/github/treesitter/jtreesitter/Language.java b/src/main/java/io/github/treesitter/jtreesitter/Language.java index 6608c3a..af289e0 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Language.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Language.java @@ -4,6 +4,7 @@ import io.github.treesitter.jtreesitter.internal.TreeSitter; import java.lang.foreign.*; +import java.util.Objects; import org.jspecify.annotations.NullMarked; import org.jspecify.annotations.Nullable; @@ -40,7 +41,7 @@ public final class Language { * @throws IllegalArgumentException If the language version is incompatible. */ public Language(MemorySegment self) throws IllegalArgumentException { - this.self = self.reinterpret(LIBRARY_ARENA, TreeSitter::ts_language_delete); + this.self = Objects.requireNonNull(self); version = ts_language_version(this.self); if (version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION) { throw new IllegalArgumentException(String.format( @@ -53,6 +54,23 @@ private static UnsatisfiedLinkError unresolved(String name) { return new UnsatisfiedLinkError("Unresolved symbol: %s".formatted(name)); } + private static MemorySegment loadLanguagePointer(SymbolLookup symbols, Arena libraryArena, String language) { + var address = symbols.find(language).orElseThrow(() -> unresolved(language)); + try { + var function = LINKER.downcallHandle(address, FUNC_DESC); + var languagePointer = (MemorySegment) function.invokeExact(); + // The results of Linker downcalls always use the global scope, but the language pointer actually points + // to data in the loaded library. Therefore change the scope of the pointer to be the same as the library. + // So if the library is unloaded while the language pointer is still in use, the language pointer becomes + // invalid and an exception occurs (instead of a JVM crash). + // Ideally this would not require the Arena (which is not always available), but instead just apply the + // scope of `address` to the `languagePointer`, but that is currently not possible, see https://bugs.openjdk.org/browse/JDK-8340641 + return languagePointer.reinterpret(libraryArena, TreeSitter::ts_language_delete); + } catch (Throwable e) { + throw new RuntimeException("Failed to load %s".formatted(language), e); + } + } + /** * Load a language by looking for its function in the given symbols. * @@ -60,21 +78,44 @@ private static UnsatisfiedLinkError unresolved(String name) { * *

{@snippet lang="java" : * String library = System.mapLibraryName("tree-sitter-java"); - * SymbolLookup symbols = SymbolLookup.libraryLookup(library, Arena.global()); + * Arena arena = Arena.global(); + * SymbolLookup symbols = SymbolLookup.libraryLookup(library, arena); + * Language language = Language.load(symbols, arena, "tree_sitter_java"); + * } + * + * @param libraryArena The arena which was used to load the {@code symbols} library. + * @throws UnsatisfiedLinkError If the language function could not be found in the symbols. + * @throws RuntimeException If the language could not be loaded. + */ + // TODO: deprecate when the bindings are generated by the CLI + public static Language load(SymbolLookup symbols, Arena libraryArena, String language) throws RuntimeException { + return new Language(loadLanguagePointer(symbols, libraryArena, language)); + } + + /** + * Load a language by looking for its function in the given symbols. + * + *

If {@code symbols} was obtained from {@link SymbolLookup#libraryLookup} and you have access to the + * {@code Arena} which was used as argument to {@code libraryLookup(...)}, prefer + * {@link #load(SymbolLookup, Arena, String)} since it is safer. + * + *

Example

+ * + *

{@snippet lang="java" : + * SymbolLookup symbols = ...; * Language language = Language.load(symbols, "tree_sitter_java"); * } * + * @throws UnsatisfiedLinkError If the language function could not be found in the symbols. * @throws RuntimeException If the language could not be loaded. */ // TODO: deprecate when the bindings are generated by the CLI public static Language load(SymbolLookup symbols, String language) throws RuntimeException { - var address = symbols.find(language).orElseThrow(() -> unresolved(language)); - try { - var function = LINKER.downcallHandle(address, FUNC_DESC); - return new Language((MemorySegment) function.invokeExact()); - } catch (Throwable e) { - throw new RuntimeException("Failed to load %s".formatted(language), e); - } + // TODO: This is not actually safe, `libraryArena` should be the arena which was used to create `symbols`, + // but we don't have access to it here; see comments in `loadLanguagePointer(...)` for details + // In the worst case the JVM crashes if the user unloads the parser library while the language is still in use + var libraryArena = LIBRARY_ARENA; + return new Language(loadLanguagePointer(symbols, libraryArena, language)); } MemorySegment segment() { diff --git a/src/test/java/io/github/treesitter/jtreesitter/LanguageTest.java b/src/test/java/io/github/treesitter/jtreesitter/LanguageTest.java index d1125e0..0130b13 100644 --- a/src/test/java/io/github/treesitter/jtreesitter/LanguageTest.java +++ b/src/test/java/io/github/treesitter/jtreesitter/LanguageTest.java @@ -3,6 +3,8 @@ import static org.junit.jupiter.api.Assertions.*; import io.github.treesitter.jtreesitter.languages.TreeSitterJava; +import java.lang.foreign.Arena; +import java.lang.foreign.SymbolLookup; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -14,6 +16,27 @@ static void beforeAll() { language = new Language(TreeSitterJava.language()); } + @Test + void load() { + // Uses `Language#load(SymbolLookup, String)` + try (var arena = Arena.ofConfined()) { + var library = System.mapLibraryName("tree-sitter-java"); + var symbols = SymbolLookup.libraryLookup(library, arena); + var loadedLanguage = Language.load(symbols, "tree_sitter_java"); + assertEquals(language.getVersion(), loadedLanguage.getVersion()); + assertEquals(language.getSymbolCount(), loadedLanguage.getSymbolCount()); + } + + // Uses `Language#load(SymbolLookup, Arena, String)` + try (var arena = Arena.ofConfined()) { + var library = System.mapLibraryName("tree-sitter-java"); + var symbols = SymbolLookup.libraryLookup(library, arena); + var loadedLanguage = Language.load(symbols, arena, "tree_sitter_java"); + assertEquals(language.getVersion(), loadedLanguage.getVersion()); + assertEquals(language.getSymbolCount(), loadedLanguage.getSymbolCount()); + } + } + @Test void getVersion() { assertEquals(14, language.getVersion()); diff --git a/src/test/java/io/github/treesitter/jtreesitter/languages/TreeSitterJava.java b/src/test/java/io/github/treesitter/jtreesitter/languages/TreeSitterJava.java index 5a8900c..9e9175d 100644 --- a/src/test/java/io/github/treesitter/jtreesitter/languages/TreeSitterJava.java +++ b/src/test/java/io/github/treesitter/jtreesitter/languages/TreeSitterJava.java @@ -39,8 +39,14 @@ private static UnsatisfiedLinkError unresolved(String name) { private MemorySegment call(String name) throws UnsatisfiedLinkError { var address = symbols.find(name).orElseThrow(() -> unresolved(name)); try { - final MethodHandle function = LINKER.downcallHandle(address, FUNC_DESC); - return ((MemorySegment) function.invokeExact()).asReadOnly(); + var function = LINKER.downcallHandle(address, FUNC_DESC); + var languagePointer = (MemorySegment) function.invokeExact(); + // The results of Linker downcalls always use the global scope, but the language pointer actually points + // to data in the loaded library. Therefore change the scope of the pointer to be the same as the library. + // So if the library is unloaded while the language pointer is still in use, the language pointer becomes + // invalid and an exception occurs (instead of a JVM crash). + languagePointer = languagePointer.reinterpret(arena, ignored -> {}); + return languagePointer.asReadOnly(); } catch (Throwable e) { throw new RuntimeException("Call to %s failed".formatted(name), e); }