Ensure that the MatchedFile information gets carried out of match_record

Since the function is already doing some parsing, might as well carry out the
important data.
This commit is contained in:
R Tyler Croy 2021-04-21 14:06:24 -07:00
parent 4d1951c716
commit 3b598da87f
1 changed files with 38 additions and 9 deletions

View File

@ -60,10 +60,7 @@ async fn s3_event_handler(event: S3Event, _ctx: Context) -> Result<String, Error
* *
* Each of the records passed is assumed to be an `ObjectCreated:Put` * Each of the records passed is assumed to be an `ObjectCreated:Put`
*/ */
fn match_record<'a>( fn match_record(record: &S3EventRecord, conf: &config::Config) -> Option<MatchedFile> {
record: &'a S3EventRecord,
conf: &'a config::Config,
) -> Option<&'a config::Source> {
if let Some(bucket) = &record.s3.bucket.name { if let Some(bucket) = &record.s3.bucket.name {
for source in &conf.sources { for source in &conf.sources {
if bucket != &source.bucket { if bucket != &source.bucket {
@ -80,7 +77,7 @@ fn match_record<'a>(
/* If there are no partitions expected, then we've already matched /* If there are no partitions expected, then we've already matched
*/ */
if expected_partitions == 0 { if expected_partitions == 0 {
return Some(source); return Some(MatchedFile::new(source, key, vec![]));
} }
/* /*
@ -88,20 +85,24 @@ fn match_record<'a>(
* that the data is partitioned according to the configuration * that the data is partitioned according to the configuration
*/ */
let partitions = Partition::from_path_str(key); let partitions = Partition::from_path_str(key);
info!("parts: {:?}", partitions); debug!(
"Partitions identified from record: {} -- {:?}",
key, partitions
);
// No match if the discovered partitions don't match the count // No match if the discovered partitions don't match the count
if partitions.len() != source.partitions.len() { if partitions.len() != expected_partitions {
continue; continue;
} }
for index in 0..source.partitions.len() { for index in 0..expected_partitions {
// The partition at the index doesn't match what we're looking for // The partition at the index doesn't match what we're looking for
if partitions[index].name != source.partitions[index] { if partitions[index].name != source.partitions[index] {
return None; return None;
} }
} }
return Some(source);
return Some(MatchedFile::new(source, key, partitions));
} }
} }
} }
@ -109,6 +110,34 @@ fn match_record<'a>(
} }
/** /**
* A MatchedFile will contain all the necessary information for reading the object and allowing a
* Delta write to occur
*/
struct MatchedFile {
/// The source bucket containing the file to read
bucket: String,
/// The source object to read, expected to be JSON
object: String,
/// Partitions to include in the Delta write
partitions: Vec<Partition>,
/// The output path for the Delta writer to use
output_path: String,
}
impl MatchedFile {
fn new(source: &config::Source, key: &str, partitions: Vec<Partition>) -> Self {
MatchedFile {
bucket: source.bucket.clone(),
object: key.to_string(),
partitions: partitions,
output_path: source.tablepath.clone(),
}
}
}
/**
* A Partition is a simple struct to carry values from paths like:
* just/a/path/year=2021/mydata.json
*/ */
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct Partition { struct Partition {