From 5ca7787459de8c38f6d718457207b5cf89b8455f Mon Sep 17 00:00:00 2001 From: Extrems Date: Tue, 31 Dec 2024 21:15:08 -0500 Subject: [PATCH] - Update XZ Embedded. --- cube/packer/source/xz/xz.h | 22 ++-- cube/packer/source/xz/xz_config.h | 26 ++-- cube/packer/source/xz/xz_dec_lzma2.c | 18 +-- cube/packer/source/xz/xz_dec_stream.c | 58 +++++++- cube/packer/source/xz/xz_private.h | 26 +++- cube/packer/source/xz/xz_sha256.c | 182 ++++++++++++++++++++++++++ 6 files changed, 292 insertions(+), 40 deletions(-) create mode 100644 cube/packer/source/xz/xz_sha256.c diff --git a/cube/packer/source/xz/xz.h b/cube/packer/source/xz/xz.h index d7628bad..c317c494 100644 --- a/cube/packer/source/xz/xz.h +++ b/cube/packer/source/xz/xz.h @@ -22,7 +22,7 @@ extern "C" { #endif -/* In Linux, this is used to make extern functions static when needed. */ +/* "#define XZ_EXTERN static" can be used to make extern functions static. */ #ifndef XZ_EXTERN # define XZ_EXTERN extern #endif @@ -290,10 +290,6 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s); * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream * marker must not be used. The unused values are reserved for future use. - * - * These functions aren't used or available in preboot code and thus aren't - * marked with XZ_EXTERN. This avoids warnings about static functions that - * are never defined. */ /* @@ -318,8 +314,8 @@ struct xz_dec_microlzma; * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. */ -extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size); +XZ_EXTERN struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state @@ -335,9 +331,9 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ -extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, - uint32_t comp_size, uint32_t uncomp_size, - int uncomp_size_is_exact); +XZ_EXTERN void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, + uint32_t comp_size, uint32_t uncomp_size, + int uncomp_size_is_exact); /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder @@ -375,15 +371,15 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, * may be changed normally like with XZ_PREALLOC. This way input data can be * provided from non-contiguous memory. */ -extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, - struct xz_buf *b); +XZ_EXTERN enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, + struct xz_buf *b); /** * xz_dec_microlzma_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_microlzma_alloc(). * If s is NULL, this function does nothing. */ -extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); +XZ_EXTERN void xz_dec_microlzma_end(struct xz_dec_microlzma *s); /* * Standalone build (userspace build or in-kernel build for boot time use) diff --git a/cube/packer/source/xz/xz_config.h b/cube/packer/source/xz/xz_config.h index eeabe689..db220801 100644 --- a/cube/packer/source/xz/xz_config.h +++ b/cube/packer/source/xz/xz_config.h @@ -15,10 +15,6 @@ /* Uncomment to enable CRC64 support. */ /* #define XZ_USE_CRC64 */ -#define XZ_DEC_SINGLE -/* #define XZ_DEC_PREALLOC */ -/* #define XZ_DEC_DYNALLOC */ - /* Uncomment as needed to enable BCJ filter decoders. */ /* #define XZ_DEC_X86 */ /* #define XZ_DEC_ARM */ @@ -29,6 +25,10 @@ /* #define XZ_DEC_IA64 */ /* #define XZ_DEC_SPARC */ +#define XZ_DEC_SINGLE +/* #define XZ_DEC_PREALLOC */ +/* #define XZ_DEC_DYNALLOC */ + /* * Visual Studio 2013 update 2 supports only __inline, not inline. * MSVC v19.0 / VS 2015 and newer support both. @@ -58,9 +58,10 @@ void memzero(void *buf, size_t size); #define min_t(type, x, y) min(x, y) #ifndef fallthrough -# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000 +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311 # define fallthrough [[fallthrough]] -# elif defined(__GNUC__) && __GNUC__ >= 7 +# elif (defined(__GNUC__) && __GNUC__ >= 7) \ + || (defined(__clang_major__) && __clang_major__ >= 10) # define fallthrough __attribute__((__fallthrough__)) # else # define fallthrough do {} while (0) @@ -100,7 +101,7 @@ static inline uint32_t get_unaligned_le32(const uint8_t *buf) #ifndef get_unaligned_be32 static inline uint32_t get_unaligned_be32(const uint8_t *buf) { - return (uint32_t)(buf[0] << 24) + return (uint32_t)((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3]; @@ -128,12 +129,15 @@ static inline void put_unaligned_be32(uint32_t val, uint8_t *buf) #endif /* - * Use get_unaligned_le32() also for aligned access for simplicity. On - * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr)) - * could save a few bytes in code size. + * To keep things simpler, use the generic unaligned methods also for + * aligned access. The only place where performance could matter is + * SHA-256 but files using SHA-256 aren't common. */ #ifndef get_le32 -# define get_le32(ptr) __builtin_bswap32(*(const uint32_t *)(ptr)) +# define get_le32 get_unaligned_le32 +#endif +#ifndef get_be32 +# define get_be32 get_unaligned_be32 #endif #endif diff --git a/cube/packer/source/xz/xz_dec_lzma2.c b/cube/packer/source/xz/xz_dec_lzma2.c index 613939f5..475c378e 100644 --- a/cube/packer/source/xz/xz_dec_lzma2.c +++ b/cube/packer/source/xz/xz_dec_lzma2.c @@ -189,7 +189,7 @@ struct lzma_dec { uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; /* - * Probility trees for additional bits for match distance + * Probability trees for additional bits for match distance * when the distance is in the range [4, 127]. */ uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; @@ -1211,8 +1211,8 @@ struct xz_dec_microlzma { struct xz_dec_lzma2 s; }; -enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, - struct xz_buf *b) +XZ_EXTERN enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, + struct xz_buf *b) { struct xz_dec_lzma2 *s = &s_ptr->s; @@ -1289,8 +1289,8 @@ enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, } } -struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size) +XZ_EXTERN struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size) { struct xz_dec_microlzma *s; @@ -1318,8 +1318,10 @@ struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, return s; } -void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, - uint32_t uncomp_size, int uncomp_size_is_exact) +XZ_EXTERN void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, + uint32_t comp_size, + uint32_t uncomp_size, + int uncomp_size_is_exact) { /* * comp_size is validated in xz_dec_microlzma_run(). @@ -1333,7 +1335,7 @@ void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, s->s.temp.size = 0; } -void xz_dec_microlzma_end(struct xz_dec_microlzma *s) +XZ_EXTERN void xz_dec_microlzma_end(struct xz_dec_microlzma *s) { if (DEC_IS_MULTI(s->s.dict.mode)) vfree(s->s.dict.buf); diff --git a/cube/packer/source/xz/xz_dec_stream.c b/cube/packer/source/xz/xz_dec_stream.c index 2d5fbe1e..33927e8e 100644 --- a/cube/packer/source/xz/xz_dec_stream.c +++ b/cube/packer/source/xz/xz_dec_stream.c @@ -15,6 +15,12 @@ # define IS_CRC64(check_type) false #endif +#ifdef XZ_USE_SHA256 +# define IS_SHA256(check_type) ((check_type) == XZ_CHECK_SHA256) +#else +# define IS_SHA256(check_type) false +#endif + /* Hash used to validate the Index field */ struct xz_dec_hash { vli_type unpadded; @@ -145,9 +151,23 @@ struct xz_dec { struct xz_dec_bcj *bcj; bool bcj_active; #endif + +#ifdef XZ_USE_SHA256 + /* + * SHA-256 value in Block + * + * struct xz_sha256 is over a hundred bytes and it's only accessed + * from a few places. By putting the SHA-256 state near the end + * of struct xz_dec (somewhere after the "index" member) reduces + * code size at least on x86 and RISC-V. It's because the first bytes + * of the struct can be accessed with smaller instructions; the + * members that are accessed from many places should be at the top. + */ + struct xz_sha256 sha256; +#endif }; -#ifdef XZ_DEC_ANY_CHECK +#if defined(XZ_DEC_ANY_CHECK) || defined(XZ_USE_SHA256) /* Sizes of the Check field with different Check IDs */ static const uint8_t check_sizes[16] = { 0, @@ -161,9 +181,9 @@ static const uint8_t check_sizes[16] = { /* * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller - * must have set s->temp.pos to indicate how much data we are supposed - * to copy into s->temp.buf. Return true once s->temp.pos has reached - * s->temp.size. + * must have set s->temp.pos and s->temp.size to indicate how much data + * we are supposed to copy into s->temp.buf. Return true once s->temp.pos + * has reached s->temp.size. */ static bool fill_temp(struct xz_dec *s, struct xz_buf *b) { @@ -260,6 +280,11 @@ static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) s->crc = xz_crc64(b->out + s->out_start, b->out_pos - s->out_start, s->crc); #endif +#ifdef XZ_USE_SHA256 + else if (s->check_type == XZ_CHECK_SHA256) + xz_sha256_update(b->out + s->out_start, + b->out_pos - s->out_start, &s->sha256); +#endif if (ret == XZ_STREAM_END) { if (s->block_header.compressed != VLI_UNKNOWN @@ -275,7 +300,7 @@ static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) s->block.hash.unpadded += s->block_header.size + s->block.compressed; -#ifdef XZ_DEC_ANY_CHECK +#if defined(XZ_DEC_ANY_CHECK) || defined(XZ_USE_SHA256) s->block.hash.unpadded += check_sizes[s->check_type]; #else if (s->check_type == XZ_CHECK_CRC32) @@ -428,13 +453,14 @@ static enum xz_ret dec_stream_header(struct xz_dec *s) s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; + if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type) + && !IS_SHA256(s->check_type)) { #ifdef XZ_DEC_ANY_CHECK - if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) return XZ_UNSUPPORTED_CHECK; #else - if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) return XZ_OPTIONS_ERROR; #endif + } return XZ_OK; } @@ -639,6 +665,11 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) if (ret != XZ_OK) return ret; +#ifdef XZ_USE_SHA256 + if (s->check_type == XZ_CHECK_SHA256) + xz_sha256_reset(&s->sha256); +#endif + s->sequence = SEQ_BLOCK_UNCOMPRESS; fallthrough; @@ -685,6 +716,19 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) if (ret != XZ_STREAM_END) return ret; } +#ifdef XZ_USE_SHA256 + else if (s->check_type == XZ_CHECK_SHA256) { + s->temp.size = 32; + if (!fill_temp(s, b)) + return XZ_OK; + + if (!xz_sha256_validate(s->temp.buf, + &s->sha256)) + return XZ_DATA_ERROR; + + s->pos = 0; + } +#endif #ifdef XZ_DEC_ANY_CHECK else if (!check_skip(s, b)) { return XZ_OK; diff --git a/cube/packer/source/xz/xz_private.h b/cube/packer/source/xz/xz_private.h index a8b1cbe8..7387401d 100644 --- a/cube/packer/source/xz/xz_private.h +++ b/cube/packer/source/xz/xz_private.h @@ -12,7 +12,7 @@ #ifdef __KERNEL__ # include # include -# include +# include /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ # ifndef XZ_PREBOOT # include @@ -111,6 +111,30 @@ # endif #endif +struct xz_sha256 { + /* Buffered input data */ + uint8_t data[64]; + + /* Internal state and the final hash value */ + uint32_t state[8]; + + /* Size of the input data */ + uint64_t size; +}; + +/* Reset the SHA-256 state to prepare for a new calculation. */ +XZ_EXTERN void xz_sha256_reset(struct xz_sha256 *s); + +/* Update the SHA-256 state with new data. */ +XZ_EXTERN void xz_sha256_update(const uint8_t *buf, size_t size, + struct xz_sha256 *s); + +/* + * Finish the SHA-256 calculation. Compare the result with the first 32 bytes + * from buf. Return true if the values are equal and false if they aren't. + */ +XZ_EXTERN bool xz_sha256_validate(const uint8_t *buf, struct xz_sha256 *s); + /* * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used * before calling xz_dec_lzma2_run(). diff --git a/cube/packer/source/xz/xz_sha256.c b/cube/packer/source/xz/xz_sha256.c new file mode 100644 index 00000000..078cad2c --- /dev/null +++ b/cube/packer/source/xz/xz_sha256.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: 0BSD + +/* + * SHA-256 + * + * This is based on the XZ Utils version which is based public domain code + * from Crypto++ Library 5.5.1 released in 2007: https://www.cryptopp.com/ + * + * Authors: Wei Dai + * Lasse Collin + */ + +#include "xz_private.h" + +static inline uint32_t +rotr_32(uint32_t num, unsigned amount) +{ + return (num >> amount) | (num << (32 - amount)); +} + +#define blk0(i) (W[i] = get_be32(&data[4 * i])) +#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ + + s0(W[(i - 15) & 15])) + +#define Ch(x, y, z) (z ^ (x & (y ^ z))) +#define Maj(x, y, z) ((x & (y ^ z)) + (y & z)) + +#define a(i) T[(0 - i) & 7] +#define b(i) T[(1 - i) & 7] +#define c(i) T[(2 - i) & 7] +#define d(i) T[(3 - i) & 7] +#define e(i) T[(4 - i) & 7] +#define f(i) T[(5 - i) & 7] +#define g(i) T[(6 - i) & 7] +#define h(i) T[(7 - i) & 7] + +#define R(i, j, blk) \ + h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) +#define R0(i) R(i, 0, blk0(i)) +#define R2(i) R(i, j, blk2(i)) + +#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2) +#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6) +#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3)) +#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10)) + +static const uint32_t SHA256_K[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 +}; + +static void +transform(uint32_t state[8], const uint8_t data[64]) +{ + uint32_t W[16]; + uint32_t T[8]; + unsigned int j; + + /* Copy state[] to working vars. */ + memcpy(T, state, sizeof(T)); + + /* The first 16 operations unrolled */ + R0( 0); R0( 1); R0( 2); R0( 3); + R0( 4); R0( 5); R0( 6); R0( 7); + R0( 8); R0( 9); R0(10); R0(11); + R0(12); R0(13); R0(14); R0(15); + + /* The remaining 48 operations partially unrolled */ + for (j = 16; j < 64; j += 16) { + R2( 0); R2( 1); R2( 2); R2( 3); + R2( 4); R2( 5); R2( 6); R2( 7); + R2( 8); R2( 9); R2(10); R2(11); + R2(12); R2(13); R2(14); R2(15); + } + + /* Add the working vars back into state[]. */ + state[0] += a(0); + state[1] += b(0); + state[2] += c(0); + state[3] += d(0); + state[4] += e(0); + state[5] += f(0); + state[6] += g(0); + state[7] += h(0); +} + +XZ_EXTERN void xz_sha256_reset(struct xz_sha256 *s) +{ + static const uint32_t initial_state[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 + }; + + memcpy(s->state, initial_state, sizeof(initial_state)); + s->size = 0; +} + +XZ_EXTERN void xz_sha256_update(const uint8_t *buf, size_t size, + struct xz_sha256 *s) +{ + size_t copy_start; + size_t copy_size; + + /* + * Copy the input data into a properly aligned temporary buffer. + * This way we can be called with arbitrarily sized buffers + * (no need to be a multiple of 64 bytes). + * + * Full 64-byte chunks could be processed directly from buf with + * unaligned access. It seemed to make very little difference in + * speed on x86-64 though. Thus it was omitted. + */ + while (size > 0) { + copy_start = s->size & 0x3F; + copy_size = 64 - copy_start; + if (copy_size > size) + copy_size = size; + + memcpy(s->data + copy_start, buf, copy_size); + + buf += copy_size; + size -= copy_size; + s->size += copy_size; + + if ((s->size & 0x3F) == 0) + transform(s->state, s->data); + } +} + +XZ_EXTERN bool xz_sha256_validate(const uint8_t *buf, struct xz_sha256 *s) +{ + /* + * Add padding as described in RFC 3174 (it describes SHA-1 but + * the same padding style is used for SHA-256 too). + */ + size_t i = s->size & 0x3F; + s->data[i++] = 0x80; + + while (i != 64 - 8) { + if (i == 64) { + transform(s->state, s->data); + i = 0; + } + + s->data[i++] = 0x00; + } + + /* Convert the message size from bytes to bits. */ + s->size *= 8; + + /* + * Store the message size in big endian byte order and + * calculate the final hash value. + */ + for (i = 0; i < 8; ++i) + s->data[64 - 8 + i] = (uint8_t)(s->size >> ((7 - i) * 8)); + + transform(s->state, s->data); + + /* Compare if the hash value matches the first 32 bytes in buf. */ + for (i = 0; i < 8; ++i) + if (get_unaligned_be32(buf + 4 * i) != s->state[i]) + return false; + + return true; +}