From 0cdd075c2262ca00ee95d00c642b8adc326b4ea7 Mon Sep 17 00:00:00 2001 From: Godfrey Chan Date: Sat, 7 Oct 2017 19:23:49 -0700 Subject: [PATCH] Add Symbol coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main use case for symbols is to use them as a HashMap key. However, this introduces a GC problem – we cannot store Ruby values in the heap without properly marking/registering them, otherwise they might get GC'ed by Ruby unexpectedly. (In fact, this is already a problem if you have a `Vec`.) I tried to avoid introducing additional problems by pinning down any symbols that goes thought the coercion protocol. This is probably overly aggressive, as it would cause any dynamic symbols (e.g. `String#to_sym`) to become un-GC-able. We can revisit this once we have a more general-purpose system to encode pinning semantics in the type system. --- crates/libcruby-sys/src/lib.rs | 11 ++++-- .../spec/text_transform_spec.rb | 36 +++++++++---------- examples/text_transform/src/lib.rs | 13 +++---- src/coercions/mod.rs | 1 + src/coercions/symbol.rs | 25 +++++++++++++ src/lib.rs | 35 ++++++++++++++++-- 6 files changed, 92 insertions(+), 29 deletions(-) create mode 100644 src/coercions/symbol.rs diff --git a/crates/libcruby-sys/src/lib.rs b/crates/libcruby-sys/src/lib.rs index 9acc764..9261a70 100644 --- a/crates/libcruby-sys/src/lib.rs +++ b/crates/libcruby-sys/src/lib.rs @@ -22,7 +22,7 @@ pub type c_string = *const libc::c_char; // pub type c_func = extern "C" fn(...); #[repr(C)] -#[derive(Copy, Clone, Debug)] +#[derive(Eq, PartialEq, Hash, Copy, Clone, Debug)] pub struct ID(*mut void); #[repr(C)] @@ -30,7 +30,7 @@ pub struct ID(*mut void); pub struct VALUE(*mut void); #[repr(C)] -#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[derive(Eq, PartialEq, Copy, Clone, Debug)] pub struct RubyException(isize); impl RubyException { @@ -163,6 +163,9 @@ extern "C" { #[link_name = "HELIX_T_FALSE"] pub static T_FALSE: isize; + #[link_name = "HELIX_T_SYMBOL"] + pub static T_SYMBOL: isize; + #[link_name = "HELIX_T_FIXNUM"] pub static T_FIXNUM: isize; @@ -188,6 +191,10 @@ extern "C" { pub fn rb_sprintf(specifier: c_string, ...) -> VALUE; pub fn rb_inspect(value: VALUE) -> VALUE; pub fn rb_intern(string: c_string) -> ID; + pub fn rb_intern_str(string: VALUE) -> ID; + pub fn rb_sym2id(symbol: VALUE) -> ID; + pub fn rb_id2sym(id: ID) -> VALUE; + pub fn rb_id2str(id: ID) -> VALUE; pub fn rb_ary_new_capa(capa: isize) -> VALUE; pub fn rb_ary_entry(ary: VALUE, offset: isize) -> VALUE; pub fn rb_ary_push(ary: VALUE, item: VALUE) -> VALUE; diff --git a/examples/text_transform/spec/text_transform_spec.rb b/examples/text_transform/spec/text_transform_spec.rb index 038cbfd..e04b1f7 100644 --- a/examples/text_transform/spec/text_transform_spec.rb +++ b/examples/text_transform/spec/text_transform_spec.rb @@ -11,13 +11,13 @@ describe "TextTransform" do it "can widen hash" do expect(TextTransform.widen_hash({ - "message" => "Hello", - "name" => "Aaron", - "handle" => "@tenderlove" + message: "Hello", + name: "Aaron", + handle: "@tenderlove" })).to eq({ - "message" => "Hello", - "name" => "Aaron", - "handle" => "@tenderlove" + "message": "Hello", + "name": "Aaron", + "handle": "@tenderlove" }) end @@ -31,13 +31,13 @@ describe "TextTransform" do it "can narrowen hash" do expect(TextTransform.narrowen_hash({ - "message" => "Hello", - "name" => "Aaron", - "handle" => "@tenderlove" + "message": "Hello", + "name": "Aaron", + "handle": "@tenderlove" })).to eq({ - "message" => "Hello", - "name" => "Aaron", - "handle" => "@tenderlove" + message: "Hello", + name: "Aaron", + handle: "@tenderlove" }) end @@ -51,13 +51,13 @@ describe "TextTransform" do it "can flip hash" do expect(TextTransform.flip_hash({ - "message" => "Hello", - "name" => "Aaron", - "handle" => "@tenderlove" + message: "Hello", + name: "Aaron", + handle: "@tenderlove" })).to eq({ - "ollǝH" => "ǝƃɐssǝɯ", - "uoɹɐ∀" => "ǝɯɐu", - "ǝʌolɹǝpuǝʇ@" => "ǝlpuɐɥ" + "ollǝH": "ǝƃɐssǝɯ", + "uoɹɐ∀": "ǝɯɐu", + "ǝʌolɹǝpuǝʇ@": "ǝlpuɐɥ" }) end end diff --git a/examples/text_transform/src/lib.rs b/examples/text_transform/src/lib.rs index 93e7396..560aecd 100644 --- a/examples/text_transform/src/lib.rs +++ b/examples/text_transform/src/lib.rs @@ -4,6 +4,7 @@ extern crate helix; use std::collections::HashMap; +use helix::Symbol; ruby! { class TextTransform { @@ -30,8 +31,8 @@ ruby! { text.into_iter().map(TextTransform::widen).collect() } - def widen_hash(text: HashMap) -> HashMap { - text.into_iter().map(|(k,v)| (TextTransform::widen(k), TextTransform::widen(v))).collect() + def widen_hash(text: HashMap) -> HashMap { + text.into_iter().map(|(k,v)| (Symbol::from_string(TextTransform::widen(k.to_string())), TextTransform::widen(v))).collect() } def narrowen(text: String) -> String { @@ -57,8 +58,8 @@ ruby! { text.into_iter().map(TextTransform::narrowen).collect() } - def narrowen_hash(text: HashMap) -> HashMap { - text.into_iter().map(|(k,v)| (TextTransform::narrowen(k), TextTransform::narrowen(v))).collect() + def narrowen_hash(text: HashMap) -> HashMap { + text.into_iter().map(|(k,v)| (Symbol::from_string(TextTransform::narrowen(k.to_string())), TextTransform::narrowen(v))).collect() } def flip(text: String) -> String { @@ -83,8 +84,8 @@ ruby! { text.into_iter().rev().map(TextTransform::flip).collect() } - def flip_hash(text: HashMap) -> HashMap { - text.into_iter().map(|(k,v)| (TextTransform::flip(v), TextTransform::flip(k))).collect() + def flip_hash(text: HashMap) -> HashMap { + text.into_iter().map(|(k,v)| (Symbol::from_string(TextTransform::flip(v)), TextTransform::flip(k.to_string()))).collect() } } } diff --git a/src/coercions/mod.rs b/src/coercions/mod.rs index 2e57f19..a9a1ab9 100644 --- a/src/coercions/mod.rs +++ b/src/coercions/mod.rs @@ -3,6 +3,7 @@ mod unit; mod bool; mod integers; mod float; +mod symbol; mod string; mod option; mod result; diff --git a/src/coercions/symbol.rs b/src/coercions/symbol.rs new file mode 100644 index 0000000..b080b43 --- /dev/null +++ b/src/coercions/symbol.rs @@ -0,0 +1,25 @@ +use sys::{VALUE, RB_TYPE_P, T_SYMBOL, rb_sym2id, rb_id2sym}; +use super::{FromRuby, CheckedValue, CheckResult, ToRuby, ToRubyResult}; +use super::super::{Symbol}; + +impl FromRuby for Symbol { + type Checked = CheckedValue; + + fn from_ruby(value: VALUE) -> CheckResult> { + if unsafe { RB_TYPE_P(value, T_SYMBOL) } { + unsafe { Ok(CheckedValue::new(value)) } + } else { + type_error!(value, "a symbol") + } + } + + fn from_checked(checked: CheckedValue) -> Symbol { + Symbol::from_id(unsafe { rb_sym2id(checked.to_value()) }) + } +} + +impl ToRuby for Symbol { + fn to_ruby(self) -> ToRubyResult { + Ok(unsafe { rb_id2sym(self.to_id()) }) + } +} diff --git a/src/lib.rs b/src/lib.rs index d511ac3..75e327b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ pub extern crate libcruby_sys as sys; // pub use rb; use std::ffi::CStr; -use sys::VALUE; +use sys::{VALUE, ID}; #[macro_export] macro_rules! raise { @@ -54,10 +54,39 @@ mod macros; pub use coercions::*; pub use errors::*; + +#[repr(C)] +#[derive(Eq, PartialEq, Hash, Copy, Clone, Debug)] +pub struct Symbol(ID); + +// Since the main use case for `Symbol` at the moment is as the +// key for a `HashMap`, this tries to avoid GC issues by alaways +// pinning the symbol, essentially making it a "copy type". This +// is probably overly aggressive, we can reconsider this when we +// have a more general-purpose mechanism to encode pinning +// semantics in the type system. +impl Symbol { + pub fn from_id(id: ID) -> Symbol { + Symbol(id) + } + + pub fn to_id(self) -> ID { + self.0 + } + + pub fn from_string(string: String) -> Symbol { + Symbol(unsafe { sys::rb_intern_str(string.to_ruby().unwrap()) }) + } + + pub fn to_string(self) -> String { + unsafe { String::from_ruby_unwrap(sys::rb_id2str(self.0)) } + } +} + pub use class_definition::{ClassDefinition, MethodDefinition}; #[repr(C)] -#[derive(Copy, Clone, Debug)] +#[derive(Eq, PartialEq, Copy, Clone, Debug)] pub struct Class(VALUE); pub trait RubyMethod { @@ -101,7 +130,7 @@ impl Class { Class(value) } - pub fn to_value(&self) -> VALUE { + pub fn to_value(self) -> VALUE { self.0 }