Returning to schema inference work

This commit is contained in:
R Tyler Croy 2021-04-17 14:41:18 -07:00
parent a795083129
commit 922a08efdc
2 changed files with 10 additions and 6 deletions

View File

@ -7,3 +7,8 @@ and cannot be properly updated to write directly to link:https://delta.io[Delta
Lake].
== Related work
link:https://docs.databricks.com/spark/latest/structured-streaming/auto-loader.html[S3 Auto-loader]

View File

@ -10,7 +10,7 @@ enum WriterError {
Generic,
}
fn json_to_batch(json: Vec<Value>) { //-> Result<RecordBatch, WriterError> {
fn json_to_batch(json: Vec<Value>) -> Result<RecordBatch, WriterError> {
use arrow::json::reader::*;
// infer_json_schema_from_iterator is weird in that it expects each value to be wrapped in a
@ -20,17 +20,17 @@ fn json_to_batch(json: Vec<Value>) { //-> Result<RecordBatch, WriterError> {
println!("schema: {:#?}", schema);
//Err(WriterError::Generic)
Err(WriterError::Generic)
}
#[cfg(test)]
mod tests {
use super::*;
#[ignore]
#[test]
fn demo() {
let delta = deltalake::get_backend_for_uri("./data");
let _delta = deltalake::get_backend_for_uri("./data/simple");
todo!("Still need ta high level writer test");
}
#[test]
@ -50,7 +50,6 @@ mod tests {
let result = json_to_batch(value);
assert!(false);
//assert!(result.is_ok());
assert!(result.is_ok());
}
}