diff --git a/config.yml b/config.yml index e7fdf1a..681aed4 100644 --- a/config.yml +++ b/config.yml @@ -7,10 +7,11 @@ sources: # └── simple # └── date=2021-04-16 # └── auditlog-1234.json - - bucket: 'my-data-bucket' + - bucket: 'data' # Everything in the simple/ prefix will be considered part of this table - prefix: 'simple' + # the prefix may be a regular expression that is compatible with the Rust + # regex crate + prefix: '^raw/simple' partitions: - 'date' - database: 'logs' - table: 'audit_logs' + tablepath: 's3://data/delta/audit_logs' diff --git a/data/delta/audit_logs/_delta_log/00000000000000000000.json b/data/delta/audit_logs/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..064c30b --- /dev/null +++ b/data/delta/audit_logs/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1619026186984,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"date\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"4574","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"bb73f716-764d-419f-b2b7-d505c32fd872","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"accountId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"actionName\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"auditLevel\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"requestId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"requestParams\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"approver\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"authType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"duration\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"reason\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"user\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"response\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"statusCode\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"serviceName\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"workspaceId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"configuration":{},"createdTime":1619026186600}} +{"add":{"path":"date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet","partitionValues":{"date":"2021-03-12"},"size":4574,"modificationTime":1619026186952,"dataChange":true}} diff --git a/data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc b/data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc new file mode 100644 index 0000000..4f0c21c Binary files /dev/null and b/data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc differ diff --git a/data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet b/data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet new file mode 100644 index 0000000..7fe57f1 Binary files /dev/null and b/data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet differ diff --git a/data/raw/simple/date=2021-03-12/auditlog-1234.json b/data/raw/simple/date=2021-03-12/auditlog-1234.json new file mode 100644 index 0000000..3f66d20 --- /dev/null +++ b/data/raw/simple/date=2021-03-12/auditlog-1234.json @@ -0,0 +1 @@ +{"version":"2.0","date":"2021-03-12","timestamp":1615535931674,"workspaceId":"0xdeadbeef","serviceName":"genie","actionName":"databricksAccess","requestId":"54f3f09c-890a-4d7f-b75a-affaebd72982","requestParams":{"duration":"81600000","reason":"Example","authType":"loginToWorkspace","approver":"","user":"charlie@example.com"},"response":{"statusCode":200},"accountId":"0xdeadbeef","auditLevel":"WORKSPACE_LEVEL"} diff --git a/src/config.rs b/src/config.rs index 21e1409..937ca5b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -40,8 +40,7 @@ struct Source { #[serde(with = "serde_regex")] prefix: Regex, partitions: Vec, - database: String, - table: String, + tablepath: String, } #[cfg(test)]