sha256.c   sha256.c 
/////////////////////////////////////////////////////////////////////////// //// /////////////////////////////////////////////////////////////////////////// ////
// //
/// \file sha256.c /// \file sha256.c
/// \brief SHA-256 /// \brief SHA-256
/// ///
/// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they /// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they
/// are imported to liblzma, SSE instructions need to be used /// are imported to liblzma, SSE instructions need to be used
/// conditionally to keep the code working on older boxes. /// conditionally to keep the code working on older boxes.
/// We could also support using some external libary for SHA-25 6.
// //
// This code is based on the code found from 7-Zip, which has a modified // This code is based on the code found from 7-Zip, which has a modified
// version of the SHA-256 found from Crypto++ <http://www.cryptopp.com/>. // version of the SHA-256 found from Crypto++ <http://www.cryptopp.com/>.
// The code was modified a little to fit into liblzma. // The code was modified a little to fit into liblzma.
// //
// Authors: Kevin Springle // Authors: Kevin Springle
// Wei Dai // Wei Dai
// Igor Pavlov // Igor Pavlov
// Lasse Collin // Lasse Collin
// //
// This file has been put into the public domain. // This file has been put into the public domain.
// You can do whatever you want with this file. // You can do whatever you want with this file.
// //
/////////////////////////////////////////////////////////////////////////// //// /////////////////////////////////////////////////////////////////////////// ////
#include "check.h" #include "check.h"
// Avoid bogus warnings in transform(). // Rotate a uint32_t. GCC can optimize this to a rotate instruction
#if TUKLIB_GNUC_REQ(4, 2) // at least on x86.
# pragma GCC diagnostic ignored "-Wuninitialized" static inline uint32_t
#endif rotr_32(uint32_t num, unsigned amount)
{
// At least on x86, GCC is able to optimize this to a rotate instruction. return (num >> amount) | (num << (32 - amount));
#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount))) }
#define blk0(i) (W[i] = data[i]) #define blk0(i) (W[i] = conv32be(data[i]))
#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ #define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
+ s0(W[(i - 15) & 15])) + s0(W[(i - 15) & 15]))
#define Ch(x, y, z) (z ^ (x & (y ^ z))) #define Ch(x, y, z) (z ^ (x & (y ^ z)))
#define Maj(x, y, z) ((x & y) | (z & (x | y))) #define Maj(x, y, z) ((x & (y ^ z)) + (y & z))
#define a(i) T[(0 - i) & 7] #define a(i) T[(0 - i) & 7]
#define b(i) T[(1 - i) & 7] #define b(i) T[(1 - i) & 7]
#define c(i) T[(2 - i) & 7] #define c(i) T[(2 - i) & 7]
#define d(i) T[(3 - i) & 7] #define d(i) T[(3 - i) & 7]
#define e(i) T[(4 - i) & 7] #define e(i) T[(4 - i) & 7]
#define f(i) T[(5 - i) & 7] #define f(i) T[(5 - i) & 7]
#define g(i) T[(6 - i) & 7] #define g(i) T[(6 - i) & 7]
#define h(i) T[(7 - i) & 7] #define h(i) T[(7 - i) & 7]
#define R(i) \ #define R(i, j, blk) \
h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \ h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
+ (j ? blk2(i) : blk0(i)); \
d(i) += h(i); \ d(i) += h(i); \
h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
#define R0(i) R(i, 0, blk0(i))
#define R2(i) R(i, j, blk2(i))
#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22)) #define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2)
#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25)) #define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6)
#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3)) #define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3))
#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10)) #define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10))
static const uint32_t SHA256_K[64] = { static const uint32_t SHA256_K[64] = {
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
skipping to change at line 82 skipping to change at line 82
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
}; };
static void static void
transform(uint32_t state[static 8], const uint32_t data[static 16]) transform(uint32_t state[8], const uint32_t data[16])
{ {
uint32_t W[16]; uint32_t W[16];
uint32_t T[8]; uint32_t T[8];
// Copy state[] to working vars. // Copy state[] to working vars.
memcpy(T, state, sizeof(T)); memcpy(T, state, sizeof(T));
// 64 operations, partially loop unrolled // The first 16 operations unrolled
for (unsigned int j = 0; j < 64; j += 16) { R0( 0); R0( 1); R0( 2); R0( 3);
R( 0); R( 1); R( 2); R( 3); R0( 4); R0( 5); R0( 6); R0( 7);
R( 4); R( 5); R( 6); R( 7); R0( 8); R0( 9); R0(10); R0(11);
R( 8); R( 9); R(10); R(11); R0(12); R0(13); R0(14); R0(15);
R(12); R(13); R(14); R(15);
// The remaining 48 operations partially unrolled
for (unsigned int j = 16; j < 64; j += 16) {
R2( 0); R2( 1); R2( 2); R2( 3);
R2( 4); R2( 5); R2( 6); R2( 7);
R2( 8); R2( 9); R2(10); R2(11);
R2(12); R2(13); R2(14); R2(15);
} }
// Add the working vars back into state[]. // Add the working vars back into state[].
state[0] += a(0); state[0] += a(0);
state[1] += b(0); state[1] += b(0);
state[2] += c(0); state[2] += c(0);
state[3] += d(0); state[3] += d(0);
state[4] += e(0); state[4] += e(0);
state[5] += f(0); state[5] += f(0);
state[6] += g(0); state[6] += g(0);
state[7] += h(0); state[7] += h(0);
} }
static void static void
process(lzma_check_state *check) process(lzma_check_state *check)
{ {
#ifdef WORDS_BIGENDIAN
transform(check->state.sha256.state, check->buffer.u32); transform(check->state.sha256.state, check->buffer.u32);
#else
uint32_t data[16];
for (size_t i = 0; i < 16; ++i)
data[i] = bswap32(check->buffer.u32[i]);
transform(check->state.sha256.state, data);
#endif
return; return;
} }
extern void extern void
lzma_sha256_init(lzma_check_state *check) lzma_sha256_init(lzma_check_state *check)
{ {
static const uint32_t s[8] = { static const uint32_t s[8] = {
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
}; };
 End of changes. 11 change blocks. 
35 lines changed or deleted 30 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/