From 95109ae12df17e4701c25363cfa2abb1bd3104b3 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Wed, 21 Apr 2021 10:30:35 -0700 Subject: [PATCH] Add some properly structured test data and a destination audit_log delta table --- config.yml | 9 +++++---- .../_delta_log/00000000000000000000.json | 4 ++++ ...875-b088-2476b77e8b44.c000.snappy.parquet.crc | Bin 0 -> 44 bytes ...90-4875-b088-2476b77e8b44.c000.snappy.parquet | Bin 0 -> 4574 bytes .../simple/date=2021-03-12/auditlog-1234.json | 1 + src/config.rs | 3 +-- 6 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 data/delta/audit_logs/_delta_log/00000000000000000000.json create mode 100644 data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc create mode 100644 data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet create mode 100644 data/raw/simple/date=2021-03-12/auditlog-1234.json diff --git a/config.yml b/config.yml index e7fdf1a..681aed4 100644 --- a/config.yml +++ b/config.yml @@ -7,10 +7,11 @@ sources: # └── simple # └── date=2021-04-16 # └── auditlog-1234.json - - bucket: 'my-data-bucket' + - bucket: 'data' # Everything in the simple/ prefix will be considered part of this table - prefix: 'simple' + # the prefix may be a regular expression that is compatible with the Rust + # regex crate + prefix: '^raw/simple' partitions: - 'date' - database: 'logs' - table: 'audit_logs' + tablepath: 's3://data/delta/audit_logs' diff --git a/data/delta/audit_logs/_delta_log/00000000000000000000.json b/data/delta/audit_logs/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..064c30b --- /dev/null +++ b/data/delta/audit_logs/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1619026186984,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"date\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"4574","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"bb73f716-764d-419f-b2b7-d505c32fd872","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"accountId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"actionName\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"auditLevel\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"requestId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"requestParams\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"approver\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"authType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"duration\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"reason\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"user\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"response\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"statusCode\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"serviceName\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"workspaceId\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"configuration":{},"createdTime":1619026186600}} +{"add":{"path":"date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet","partitionValues":{"date":"2021-03-12"},"size":4574,"modificationTime":1619026186952,"dataChange":true}} diff --git a/data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc b/data/delta/audit_logs/date=2021-03-12/.part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..4f0c21c7daedf34747536523794bc467fc275c72 GIT binary patch literal 44 zcmYc;N@ieSU}CsxJlB?KYL#>UBJWiZx9xYokmt%%^_n#?f=RMu>!$}QPPy7U0a_ao A0ssI2 literal 0 HcmV?d00001 diff --git a/data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet b/data/delta/audit_logs/date=2021-03-12/part-00000-2254ce65-f690-4875-b088-2476b77e8b44.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7fe57f1d88702ec2cbd579ea6c0537b8c902ca9c GIT binary patch literal 4574 zcmcIo&1>UE6qoFLD2da2j*wFmf=Pt8IKs|{V+S8LYy;aiv|$^%hj!T=OXI{uwj4>$ zri4HbWe+*@P?i!x4?ToEFQx3EhduYar~LzZD9g5#(qj*O`kq)ek(1U0`OVXNzu!+Y zqxUA>-nf@X49cKv7RA6AKwL@~&K)TPD-}Yjw9PY_E_WD)ncj-QHO>yhHAn}<6;!04 z#}#biie8iVjSX2Qh9QJJKojzXqkv{caVW>ZZifZpB~+k@$L@b}@1sw*H|}gc+}iwX zb4zG<0ByS~6az%bM-J&2*lm!&`2~7Jan7%yhztXf1+-2@ zj;q>kt+Aus*Ytg(iDe>$Jl2|D9R=X;?zv3XqPJ%NvxwfKn7LxUkh4C5Zvm5U2VMZ1 zy6YlyF!1sKaUNwT#KFxYTyLsG@C?B7Oy36plkdA|gD`M&0IZB|Q(#l_9@fMSkVRd?uO9%cS)m0 z1m_EIE{$yjXY2KYZHEFv3eC}aW>|gUh_&>1mZitD11})Fv*Mt6Hvw!0%~D|TB`GIZ z7l80>NF2>MiPq!5UC8_2s$eYn&&1sX?0tRzp&{>)I!4nBN-V^LAuP*Ut6_eiEWn#% zd4{oU*0jdQxK09BOR1S#ex+mQusSX3MyvDa4^0f!M>E zOg!yY(2!ZHgHCv9oR~pSV^Dk}kG-m3PXQlGrjl<(E`<`wRgM`*QZs~Ne2o8*;b+g-HPUZ9;dnerM0<# zW%=J&@p-%)>OCO`FkjuUXx_8_fH&AbHtmZz|8rbC>n}&^AJL*f;D6b(=tp@KY>>?5 zt?zGn`?P0dftqsP@hvsGJgG{>Xt3&jOD*)#W!OxtzJ(H zENfcB-b>5(WWs-Ffi8hI_*X3dH7h=uD7*czTU(@6pHBL!srpGR&gmp*^%`DaSz5#1 zOIk0c0z| z-x#is;U5xi4B?zBe3(e{=$<|NBx}-pn?|TudC$b%C`_|Ppw zi*1}6>^0VS, - database: String, - table: String, + tablepath: String, } #[cfg(test)]