Merge pull request #150 from tildeio/safer-strings

Safer string coercions
This commit is contained in:
Godfrey Chan 2018-06-01 17:13:19 -07:00 committed by GitHub
commit b6a3e5acc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 109 additions and 4 deletions

View File

@ -208,6 +208,8 @@ extern "C" {
pub fn rb_define_alloc_func(klass: VALUE, func: extern "C" fn(klass: VALUE) -> VALUE);
pub fn rb_define_method(class: VALUE, name: c_string, func: c_func, arity: isize);
pub fn rb_define_singleton_method(class: VALUE, name: c_string, func: c_func, arity: isize);
pub fn rb_enc_get_index(obj: VALUE) -> isize;
pub fn rb_utf8_encindex() -> isize;
pub fn rb_sprintf(specifier: c_string, ...) -> VALUE;
pub fn rb_inspect(value: VALUE) -> VALUE;
pub fn rb_intern(string: c_string) -> ID;
@ -239,6 +241,12 @@ extern "C" {
state: *mut RubyException)
-> VALUE;
#[link_name = "HELIX_rb_str_valid_encoding_p"]
pub fn rb_str_valid_encoding_p(string: VALUE) -> bool;
#[link_name = "HELIX_rb_str_ascii_only_p"]
pub fn rb_str_ascii_only_p(string: VALUE) -> bool;
#[link_name = "HELIX_Data_Wrap_Struct"]
pub fn Data_Wrap_Struct(klass: VALUE, mark: extern "C" fn(*mut void), free: extern "C" fn(*mut void), data: *mut void) -> VALUE;

View File

@ -50,7 +50,19 @@ describe "Console" do
end
end
it "can handle invalid arguments" do
expect { console.log(123) }.to raise_error(TypeError, "Expected a UTF-8 String, got 123")
describe "invalid arguments" do
it "can handle non-strings" do
expect { console.log(123) }.to raise_error(TypeError, "Expected a String, got 123")
end
it "raises on non UTF-8 strings" do
str = "".encode("BIG5")
expect { console.log(str) }.to raise_error(TypeError, "Expected an UTF-8 String, got #{str.inspect}")
end
it "raises on invalid UTF-8 strings" do
str = "\330"
expect { console.log(str) }.to raise_error(TypeError, "Expected a valid UTF-8 String, got #{str.inspect}")
end
end
end

View File

@ -2,6 +2,7 @@
#include <ruby.h>
#include <ruby/intern.h>
#include <ruby/encoding.h>
#include <stdbool.h>
#include <helix_runtime.h>
@ -64,6 +65,14 @@ VALUE HELIX_rb_utf8_str_new(const char* str, long len) {
return rb_utf8_str_new(str, len);
}
bool HELIX_rb_str_valid_encoding_p(VALUE str) {
return rb_enc_str_coderange(str) != ENC_CODERANGE_BROKEN;
}
bool HELIX_rb_str_ascii_only_p(VALUE str) {
return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
}
VALUE HELIX_Data_Wrap_Struct(VALUE klass, HELIX_RUBY_DATA_FUNC mark, HELIX_RUBY_DATA_FUNC free, void* data) {
return Data_Wrap_Struct(klass, mark, free, data);
}

View File

@ -69,6 +69,9 @@ HELIX_EXTERN VALUE HELIX_FIX2INT(VALUE fix);
HELIX_EXTERN VALUE HELIX_rb_utf8_str_new(const char* str, long len);
HELIX_EXTERN bool HELIX_rb_str_valid_encoding_p(VALUE str);
HELIX_EXTERN bool HELIX_rb_str_ascii_only_p(VALUE str);
// typedef VALUE (*HELIX_rb_alloc_func_t)(VALUE);
// void HELIX_rb_define_alloc_func(VALUE klass, HELIX_rb_alloc_func_t func);

View File

@ -204,6 +204,56 @@ describe HelixRuntime do
end
end
describe "HELIX_rb_str_valid_encoding_p" do
it "matches #valid_encoding?" do
str = "hello world"
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = "hello world".encode("BIG5")
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = "hello world".force_encoding("BIG5")
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = ""
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = "".encode("BIG5")
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = "".force_encoding("BIG5")
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
str = "\330"
expect(Dummy.valid_encoding_p(str)).to eq(str.valid_encoding?)
end
end
describe "HELIX_rb_str_ascii_only_p" do
it "matches #ascii_only?" do
str = "hello world"
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = "hello world".encode("BIG5")
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = "hello world".force_encoding("BIG5")
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = ""
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = "".encode("BIG5")
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = "".force_encoding("BIG5")
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
str = "\330"
expect(Dummy.ascii_only_p(str)).to eq(str.ascii_only?)
end
end
describe "Data_{Wrap,Get,Set}_Struct" do
it "can allocate then change the data" do
wrapper = Dummy::Wrapper.new

View File

@ -131,6 +131,14 @@ VALUE allocate_wrapper(VALUE klass) {
return HELIX_Data_Wrap_Struct(klass, NULL, deallocate_wrapper, num);
}
static VALUE TEST_valid_encoding_p(VALUE _self, VALUE str) {
return HELIX_rb_str_valid_encoding_p(str) ? Qtrue : Qfalse;
}
static VALUE TEST_ascii_only_p(VALUE _self, VALUE str) {
return HELIX_rb_str_ascii_only_p(str) ? Qtrue : Qfalse;
}
static VALUE TEST_get_data(VALUE _self, VALUE wrapped) {
int* num = HELIX_Data_Get_Struct_Value(wrapped);
return INT2FIX(*num);
@ -227,6 +235,9 @@ void Init_dummy() {
EXPORT_FUNC(STR2STR, 2);
EXPORT_FUNC(valid_encoding_p, 1);
EXPORT_FUNC(ascii_only_p, 1);
EXPORT_FUNC(get_data, 1);
EXPORT_FUNC(get_data_ptr, 1);
EXPORT_FUNC(set_data, 2);

View File

@ -9,9 +9,21 @@ impl FromRuby for String {
fn from_ruby(value: VALUE) -> CheckResult<CheckedValue<String>> {
if unsafe { sys::RB_TYPE_P(value, sys::T_STRING) } {
Ok(unsafe { CheckedValue::new(value) })
if unsafe { sys::rb_enc_get_index(value) == sys::rb_utf8_encindex() } {
if unsafe { sys::rb_str_valid_encoding_p(value) } {
unsafe { Ok(CheckedValue::new(value)) }
} else {
type_error!(value, "a valid UTF-8 String")
}
} else {
if unsafe { sys::rb_str_ascii_only_p(value) } {
unsafe { Ok(CheckedValue::new(value)) }
} else {
type_error!(value, "an UTF-8 String")
}
}
} else {
type_error!(value, "a UTF-8 String")
type_error!(value, "a String")
}
}