QLOG: JSON Encoder: Design

Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Neil Horman <nhorman@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/22037)
This commit is contained in:
Hugo Landau 2023-09-08 11:13:39 +01:00
parent 89dd87e1e8
commit 8a123d4342
2 changed files with 316 additions and 0 deletions

View File

@ -0,0 +1,91 @@
JSON Encoder
============
Approach
--------
The JSON encoder exists to support QLOG implementation. There is no intention to
implement a decoder at this time. The encoder is intended to support
zero-allocation automation using immediate calls without the use of an
intermediate syntax tree representation. This enables highly efficient
serialization when called from QUIC code without dynamic memory allocation.
An example usage is as follows:
```c
int generate_json(BIO *b)
{
int ret = 1;
JSON_ENC z;
if (!ossl_json_init(&z, b, 0))
return 0;
ossl_json_object_begin(&z);
{
ossl_json_key(&z, "key");
ossl_json_str(&z, "value");
ossl_json_key(&z, "key2");
ossl_json_u64(&z, 42);
ossl_json_key(&z, "key3");
ossl_json_array_begin(&z);
{
ossl_json_null(&z);
ossl_json_f64(&z, 42.0);
ossl_json_str(&z, "string");
}
ossl_json_array_end(&z);
}
ossl_json_object_end(&z);
if (ossl_json_get_error_flag(&z))
ret = 0;
ossl_json_cleanup(&z);
return ret;
}
```
The zero-allocation, immediate-output design means that most API calls
correspond directly to immediately generated output; however there is some
minimal state tracking. The API guarantees that it will never generate invalid
JSON, with two exceptions:
- it is the caller's responsibility to avoid generating duplicate keys;
- it is the caller's responsibility to provide valid UTF-8 strings.
Since the JSON encoder is for internal use only, its structure is defined in
headers and can be incorporated into other objects without a heap allocation.
The JSON encoder maintains an internal write buffer and a small state tracking
stack (1 bit per level of depth in a JSON hierarchy).
JSON-SEQ
--------
The encoder supports JSON-SEQ (RFC 7464), as this is an optimal format for
outputting QLOG for our purposes.
Number Handling
---------------
It is an unfortunate reality that many JSON implementations are not able to
handle integers outside `[-2**53 + 1, 2**53 - 1]`. This leads to the I-JSON
specification, RFC 7493, which recommends that values outside these ranges are
encoded as strings.
An optional I-JSON mode is offered, in which case integers outside these ranges
are automatically serialized as strings instead.
Error Handling
--------------
Error handling is deferred to improve ergonomics. If any call to a JSON encoder
fails, all future calls also fail and the caller is expected to ascertain that
the encoding process failed by calling `ossl_json_get_error_flag`.
API
---
The API is documented in `include/internal/json_enc.h`.

225
include/internal/json_enc.h Normal file
View File

@ -0,0 +1,225 @@
/*
* Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#ifndef JSON_ENC_H
# define JSON_ENC_H
# include <openssl/bio.h>
/*
* JSON Encoder
* ============
*
* This JSON encoder is used for QLOG. It supports ordinary JSON (RFC 7159),
* JSON-SEQ (RFC 7464) and I-JSON (RFC 7493). It supports only basic ASCII.
*/
struct json_write_buf {
BIO *bio;
char *buf;
size_t alloc, cur;
};
typedef struct json_enc_st {
uint32_t flags;
/* error: 1 if an error has occurred. */
/* state: current state. */
/* stack stores a bitmap. 0=object, 1=array. */
/* stack cur size: stack_end_byte bytes, stack_end_bit bits. */
/* stack alloc size: stack_bytes bytes. */
unsigned char error, stack_end_bit, state, *stack, defer_indent;
unsigned char stack_small[16];
struct json_write_buf wbuf;
size_t stack_end_byte, stack_bytes;
} JSON_ENC;
/*
* ossl_json_init
* --------------
*
* Initialises a JSON encoder.
*
* If the flag JSON_FLAG_SEQ is passed, the output is in JSON-SEQ. The caller
* should use the encoder as though it is encoding members of a JSON array (but
* without calling ossl_json_array_begin() or ossl_json_array_end()). Each
* top-level JSON item (e.g. JSON object) encoded will be separated correctly as
* per the JSON-SEQ format.
*
* If the flag JSON_FLAG_SEQ is not passed, the output is in JSON format.
* Generally the caller should encode only a single output item (e.g.
* a JSON object).
*
* By default, JSON output is maximally compact. If JSON_FLAG_PRETTY is set,
* JSON/JSON-SEQ output is spaced for optimal human readability.
*
* If JSON_FLAG_IJSON is set, integers outside the range `[-2**53 + 1, 2**53 -
* 1]` are automatically converted to decimal strings before serialization.
*/
#define JSON_FLAG_NONE 0
#define JSON_FLAG_SEQ (1U << 0)
#define JSON_FLAG_PRETTY (1U << 1)
#define JSON_FLAG_IJSON (1U << 2)
int ossl_json_init(JSON_ENC *json, BIO *bio, uint32_t flags);
/*
* ossl_json_cleanup
* -----------------
*
* Destroys a JSON encoder.
*/
void ossl_json_cleanup(JSON_ENC *json);
/*
* ossl_json_reset
* ---------------
*
* Resets a JSON encoder, as though it has just been initialised, allowing it
* to be used again for new output syntactically unrelated to any previous
* output. This is similar to calling ossl_json_cleanup followed by
* ossl_json_init but may allow internal buffers, etc. to be reused.
*
* If the JSON encoder has entered an error state, this function MAY allow
* recovery from this error state, in which case it will return 1. If this
* function returns 0, the JSON encoder is unrecoverable and
* ossl_json_cleanup() must be called.
*
* Automatically calls ossl_json_flush().
*/
int ossl_json_reset(JSON_ENC *json);
/*
* ossl_json_flush
* ---------------
*
* Flushes the JSON encoder, ensuring that any residual bytes in internal
* buffers are written to the provided sink BIO. Flushing may also happen
* autonomously as buffers are filled, but the caller must use this function
* to guarantee all data has been flushed.
*/
int ossl_json_flush(JSON_ENC *json);
/*
* ossl_json_flush_cleanup
* -----------------------
*
* Tries to flush as in a call to ossl_json_flush, and then calls
* ossl_json_cleanup regardless of the result. The result of the flush call is
* returned.
*/
int ossl_json_flush_cleanup(JSON_ENC *json);
/*
* ossl_json_set_sink
* ------------------
*
* Changes the sink used by the JSON encoder.
*/
int ossl_json_set_sink(JSON_ENC *json, BIO *bio);
/*
* ossl_json_in_error
* ------------------
*
* To enhance the ergonomics of the JSON API, the JSON object uses an implicit
* error tracking model. When a JSON API call fails (for example due to caller
* error, such as trying to close an array which was not opened), the JSON
* object enters an error state and all further calls are silently ignored.
*
* The caller can detect this condition after it is finished making builder
* calls to the JSON object by calling this function. This function returns 1
* if an error occurred. At this point the caller's only recourse is to call
* ossl_json_reset() or ossl_json_cleanup().
*
* Note that partial (i.e., invalid) output may still have been sent to the BIO
* in this case. Since the amount of output which can potentially be produced
* by a JSON object is unbounded, it is impractical to buffer it all before
* flushing. It is expected that errors will ordinarily be either caller errors
* (programming errors) or BIO errors.
*/
int ossl_json_in_error(JSON_ENC *json);
/*
* JSON Builder Calls
* ==================
*
* These functions are used to build JSON output. The functions which have
* begin and end function pairs must be called in correctly nested sequence.
* When writing an object, ossl_json_key() must be called exactly once before
* each call to write a JSON item.
*
* The JSON library takes responsibility for enforcing correct usage patterns.
* If a call is made that does not correspond to the JSON syntax, the JSON
* object enters the error state and all subsequent calls are ignored.
*
* In JSON-SEQ mode, the caller should act as though the library implicitly
* places all calls between an ossl_json_array_begin() and
* ossl_json_array_end() pair; for example, the normal usage pattern would be
* to call ossl_json_object_begin() followed by ossl_json_object_end(), in
* repeated sequence.
*
* The library does not enforce non-generation of duplicate keys. Avoiding this
* is the caller's responsibility. It is also the caller's responsibility to
* pass valid UTF-8 strings. All other forms of invalid output will cause an
* error. Note that due to the immediate nature of the API, partial output may
* have already been generated in such a case.
*/
/* Begin a new JSON object. */
void ossl_json_object_begin(JSON_ENC *json);
/* End a JSON obejct. Must be matched with a call to ossl_json_object_begin(). */
void ossl_json_object_end(JSON_ENC *json);
/* Begin a new JSON array. */
void ossl_json_array_begin(JSON_ENC *json);
/* End a JSON array. Must be matched with a call to ossl_json_array_end(). */
void ossl_json_array_end(JSON_ENC *json);
/*
* Encode a JSON key within an object. Pass a zero-terminated string, which can
* be freed immediately following the call to this function.
*/
void ossl_json_key(JSON_ENC *json, const char *key);
/* Encode a JSON 'null' value. */
void ossl_json_null(JSON_ENC *json);
/* Encode a JSON boolean value. */
void ossl_json_bool(JSON_ENC *json, int value);
/* Encode a JSON integer from a uint64_t. */
void ossl_json_u64(JSON_ENC *json, uint64_t value);
/* Encode a JSON integer from an int64_t. */
void ossl_json_i64(JSON_ENC *json, int64_t value);
/* Encode a JSON number from a 64-bit floating point value. */
void ossl_json_f64(JSON_ENC *json, double value);
/*
* Encode a JSON UTF-8 string from a zero-terminated string. The string passed
* can be freed immediately following the call to this function.
*/
void ossl_json_str(JSON_ENC *json, const char *str);
/*
* Encode a JSON UTF-8 string from a string with the given length. The string
* passed can be freed immediately following the call to this function.
*/
void ossl_json_str_len(JSON_ENC *json, const char *str, size_t str_len);
/*
* Encode binary data as a lowercase hex string. data_len is the data length in
* bytes.
*/
void ossl_json_str_hex(JSON_ENC *json, const void *data, size_t data_len);
#endif