Returning to schema inference work

2021-04-17 14:41:18 -07:00 · 2021-04-17 14:41:18 -07:00 · 922a08efdc
parent a795083129
commit 922a08efdc
2 changed files with 10 additions and 6 deletions
--- a/README.adoc
+++ b/README.adoc
@ -7,3 +7,8 @@ and cannot be properly updated to write directly to link:https://delta.io[Delta
 Lake].


+
+== Related work
+
+link:https://docs.databricks.com/spark/latest/structured-streaming/auto-loader.html[S3 Auto-loader]
+
--- a/src/writer.rs
+++ b/src/writer.rs
@ -10,7 +10,7 @@ enum WriterError {
    Generic,
 }

-fn json_to_batch(json: Vec<Value>) { //-> Result<RecordBatch, WriterError>  {
+fn json_to_batch(json: Vec<Value>) -> Result<RecordBatch, WriterError>  {
    use arrow::json::reader::*;

    // infer_json_schema_from_iterator is weird in that it expects each value to be wrapped in a
@ -20,17 +20,17 @@ fn json_to_batch(json: Vec<Value>) { //-> Result<RecordBatch, WriterError>  {

    println!("schema: {:#?}", schema);

-    //Err(WriterError::Generic)
+    Err(WriterError::Generic)
 }

 #[cfg(test)]
 mod tests {
    use super::*;

-    #[ignore]
    #[test]
    fn demo() {
-        let delta = deltalake::get_backend_for_uri("./data");
+        let _delta = deltalake::get_backend_for_uri("./data/simple");
+        todo!("Still need ta high level writer test");
    }

    #[test]
@ -50,7 +50,6 @@ mod tests {


        let result = json_to_batch(value);
-        assert!(false);
-        //assert!(result.is_ok());
+        assert!(result.is_ok());
    }
 }