Skip to content

Commit

Permalink
fix: Check loaded language ABI (#783)
Browse files Browse the repository at this point in the history
* fix: Check loaded language ABI

Check that the tree-sitter ABI version reported by the loaded language
is within bounds for the tree-sitter client library that's bundled with
diffsitter. This should give more helpful errors when a user has a
grammar that's built against a tree-sitter version that's not in range.

Previously the program would just segfault which does not make it clear
to users that there is a mismatch between the grammar being loaded and
the library bundled in diffsitter.

* test(grammars): Add unit test for ts ABI

Adds a unit test for bundled tree-sitter ABIs to verify that they have a
compatible ABI version.

* refactor(parsing): Refactor method to check ABI

Refactor the method to check a tree-sitter ABI version for a grammar so
that it returns a result type so the method can be used monadically and
re-used for a unit test.
  • Loading branch information
afnanenayet authored Dec 24, 2023
1 parent 239bb29 commit e56d1ee
Showing 1 changed file with 38 additions and 2 deletions.
40 changes: 38 additions & 2 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use phf::phf_map;
#[cfg(not(feature = "static-grammar-libs"))]
use tree_sitter::Language;

use anyhow::Result;
use log::{debug, error, info};
use logging_timer::time;
use serde::{Deserialize, Serialize};
Expand All @@ -37,7 +36,7 @@ use std::{
path::{Path, PathBuf},
};
use thiserror::Error;
use tree_sitter::{Parser, Tree};
use tree_sitter::{Parser, Tree, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION};

/// A mapping of file extensions to their associated languages
///
Expand Down Expand Up @@ -102,6 +101,9 @@ pub enum LoadingError {
#[cfg(feature = "dynamic-grammar-libs")]
#[error("Unable to dynamically load grammar")]
LibloadingError(#[from] libloading::Error),

#[error("Attempted to load a tree-sitter grammar with incompatible language ABI version: {0} (supported range: {1} - {2})")]
AbiOutOfRange(usize, usize, usize),
}

type StringMap = HashMap<String, String>;
Expand Down Expand Up @@ -354,6 +356,27 @@ pub fn lang_name_from_file_ext<'cfg>(
}
}

/// A convenience function to check of a tree-sitter language has a compatible ABI version for
/// `diffsitter`.
///
/// Diffsitter has a version of the tree-sitter library it's build against and that library
/// supports a certain range of tree-sitter ABIs. Each compiled tree-sitter grammar reports its ABI
/// version, so we can check whether the ABI versions are compatible before loading the grammar
/// as a tree-sitter parser, which should prevent segfaults due to these sorts of mismatches.
fn ts_language_abi_checked(ts_language: &Language) -> Result<(), LoadingError> {
let loaded_ts_version = ts_language.version();
let is_abi_compatible =
(MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&loaded_ts_version);
if !is_abi_compatible {
return Err(LoadingError::AbiOutOfRange(
loaded_ts_version,
MIN_COMPATIBLE_LANGUAGE_VERSION,
LANGUAGE_VERSION,
));
}
Ok(())
}

/// Creates a tree-sitter [Parser] for a given language.
///
/// This handles the boilerplate for loading the tree-sitter library for the [language](Language) and setting
Expand All @@ -364,6 +387,7 @@ pub fn ts_parser_for_language(
config: &GrammarConfig,
) -> Result<Parser, LoadingError> {
let ts_language = generate_language(language, config)?;
ts_language_abi_checked(&ts_language)?;
let mut parser = Parser::new();
parser.set_language(ts_language)?;
Ok(parser)
Expand Down Expand Up @@ -445,4 +469,16 @@ mod tests {

assert!(failures.is_empty(), "{:#?}", failures);
}

#[cfg(feature = "static-grammar-libs")]
#[test]
fn test_static_grammar_tree_sitter_abi_compatibility() -> Result<(), LoadingError> {
for (_, language_ctor) in &LANGUAGES {
unsafe {
let language = language_ctor();
ts_language_abi_checked(&language)?;
}
}
Ok(())
}
}

0 comments on commit e56d1ee

Please # to comment.