mirror of https://github.com/tildeio/helix
Safer string coercions
Previously, we blindly assume Ruby strings are UTF-8 and turn them into Rust Strings (which *are* assumed to be UTF-8). This is clearly unsafe so this commit adds some checks to cofirm that and generate type errors appropiately.
This commit is contained in:
parent
bda5141f37
commit
faaa6b1b26
|
@ -208,6 +208,8 @@ extern "C" {
|
|||
pub fn rb_define_alloc_func(klass: VALUE, func: extern "C" fn(klass: VALUE) -> VALUE);
|
||||
pub fn rb_define_method(class: VALUE, name: c_string, func: c_func, arity: isize);
|
||||
pub fn rb_define_singleton_method(class: VALUE, name: c_string, func: c_func, arity: isize);
|
||||
pub fn rb_enc_get_index(obj: VALUE) -> isize;
|
||||
pub fn rb_utf8_encindex() -> isize;
|
||||
pub fn rb_sprintf(specifier: c_string, ...) -> VALUE;
|
||||
pub fn rb_inspect(value: VALUE) -> VALUE;
|
||||
pub fn rb_intern(string: c_string) -> ID;
|
||||
|
@ -239,6 +241,12 @@ extern "C" {
|
|||
state: *mut RubyException)
|
||||
-> VALUE;
|
||||
|
||||
#[link_name = "HELIX_rb_str_valid_encoding_p"]
|
||||
pub fn rb_str_valid_encoding_p(string: VALUE) -> bool;
|
||||
|
||||
#[link_name = "HELIX_rb_str_ascii_only_p"]
|
||||
pub fn rb_str_ascii_only_p(string: VALUE) -> bool;
|
||||
|
||||
#[link_name = "HELIX_Data_Wrap_Struct"]
|
||||
pub fn Data_Wrap_Struct(klass: VALUE, mark: extern "C" fn(*mut void), free: extern "C" fn(*mut void), data: *mut void) -> VALUE;
|
||||
|
||||
|
|
|
@ -50,7 +50,19 @@ describe "Console" do
|
|||
end
|
||||
end
|
||||
|
||||
it "can handle invalid arguments" do
|
||||
expect { console.log(123) }.to raise_error(TypeError, "Expected a UTF-8 String, got 123")
|
||||
describe "invalid arguments" do
|
||||
it "can handle non-strings" do
|
||||
expect { console.log(123) }.to raise_error(TypeError, "Expected a String, got 123")
|
||||
end
|
||||
|
||||
it "raises on non UTF-8 strings" do
|
||||
str = "hello".encode("BIG5")
|
||||
expect { console.log(str) }.to raise_error(TypeError, "Expected an UTF-8 String, got #{str.inspect}")
|
||||
end
|
||||
|
||||
it "raises on invalid UTF-8 strings" do
|
||||
str = "\330"
|
||||
expect { console.log(str) }.to raise_error(TypeError, "Expected a valid UTF-8 String, got #{str.inspect}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <ruby.h>
|
||||
#include <ruby/intern.h>
|
||||
#include <ruby/encoding.h>
|
||||
#include <stdbool.h>
|
||||
#include <helix_runtime.h>
|
||||
|
||||
|
@ -64,6 +65,14 @@ VALUE HELIX_rb_utf8_str_new(const char* str, long len) {
|
|||
return rb_utf8_str_new(str, len);
|
||||
}
|
||||
|
||||
bool HELIX_rb_str_valid_encoding_p(VALUE str) {
|
||||
return rb_enc_str_coderange(str) != ENC_CODERANGE_BROKEN;
|
||||
}
|
||||
|
||||
bool HELIX_rb_str_ascii_only_p(VALUE str) {
|
||||
return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
VALUE HELIX_Data_Wrap_Struct(VALUE klass, HELIX_RUBY_DATA_FUNC mark, HELIX_RUBY_DATA_FUNC free, void* data) {
|
||||
return Data_Wrap_Struct(klass, mark, free, data);
|
||||
}
|
||||
|
|
|
@ -69,6 +69,9 @@ HELIX_EXTERN VALUE HELIX_FIX2INT(VALUE fix);
|
|||
|
||||
HELIX_EXTERN VALUE HELIX_rb_utf8_str_new(const char* str, long len);
|
||||
|
||||
HELIX_EXTERN bool HELIX_rb_str_valid_encoding_p(VALUE str);
|
||||
HELIX_EXTERN bool HELIX_rb_str_ascii_only_p(VALUE str);
|
||||
|
||||
// typedef VALUE (*HELIX_rb_alloc_func_t)(VALUE);
|
||||
// void HELIX_rb_define_alloc_func(VALUE klass, HELIX_rb_alloc_func_t func);
|
||||
|
||||
|
|
|
@ -204,6 +204,56 @@ describe HelixRuntime do
|
|||
end
|
||||
end
|
||||
|
||||
describe "HELIX_rb_str_valid_encoding_p" do
|
||||
it "matches #valid_encoding?" do
|
||||
str = "hello world"
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "hello world".encode("BIG5")
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "hello world".force_encoding("BIG5")
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "hello"
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "hello".encode("BIG5")
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "hello".force_encoding("BIG5")
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
|
||||
str = "\330"
|
||||
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
|
||||
end
|
||||
end
|
||||
|
||||
describe "HELIX_rb_str_ascii_only_p" do
|
||||
it "matches #ascii_only?" do
|
||||
str = "hello world"
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "hello world".encode("BIG5")
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "hello world".force_encoding("BIG5")
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "hello"
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "hello".encode("BIG5")
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "hello".force_encoding("BIG5")
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
|
||||
str = "\330"
|
||||
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
|
||||
end
|
||||
end
|
||||
|
||||
describe "Data_{Wrap,Get,Set}_Struct" do
|
||||
it "can allocate then change the data" do
|
||||
wrapper = Dummy::Wrapper.new
|
||||
|
|
|
@ -131,6 +131,14 @@ VALUE allocate_wrapper(VALUE klass) {
|
|||
return HELIX_Data_Wrap_Struct(klass, NULL, deallocate_wrapper, num);
|
||||
}
|
||||
|
||||
static VALUE TEST_valid_encoding_p(VALUE _self, VALUE str) {
|
||||
return HELIX_rb_str_valid_encoding_p(str) ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
static VALUE TEST_ascii_only_p(VALUE _self, VALUE str) {
|
||||
return HELIX_rb_str_ascii_only_p(str) ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
static VALUE TEST_get_data(VALUE _self, VALUE wrapped) {
|
||||
int* num = HELIX_Data_Get_Struct_Value(wrapped);
|
||||
return INT2FIX(*num);
|
||||
|
@ -227,6 +235,9 @@ void Init_dummy() {
|
|||
|
||||
EXPORT_FUNC(STR2STR, 2);
|
||||
|
||||
EXPORT_FUNC(valid_encoding_p, 1);
|
||||
EXPORT_FUNC(ascii_only_p, 1);
|
||||
|
||||
EXPORT_FUNC(get_data, 1);
|
||||
EXPORT_FUNC(get_data_ptr, 1);
|
||||
EXPORT_FUNC(set_data, 2);
|
||||
|
|
|
@ -9,9 +9,21 @@ impl FromRuby for String {
|
|||
|
||||
fn from_ruby(value: VALUE) -> CheckResult<CheckedValue<String>> {
|
||||
if unsafe { sys::RB_TYPE_P(value, sys::T_STRING) } {
|
||||
Ok(unsafe { CheckedValue::new(value) })
|
||||
if unsafe { sys::rb_enc_get_index(value) == sys::rb_utf8_encindex() } {
|
||||
if unsafe { sys::rb_str_valid_encoding_p(value) } {
|
||||
unsafe { Ok(CheckedValue::new(value)) }
|
||||
} else {
|
||||
type_error!(value, "a valid UTF-8 String")
|
||||
}
|
||||
} else {
|
||||
if unsafe { sys::rb_str_ascii_only_p(value) } {
|
||||
unsafe { Ok(CheckedValue::new(value)) }
|
||||
} else {
|
||||
type_error!(value, "an UTF-8 String")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
type_error!(value, "a UTF-8 String")
|
||||
type_error!(value, "a String")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue