v23.8

2025-09-17 23:44:27 +00:00 · 2023-11-11 16:48:57 -05:00
parent e043698442
commit 26b9429589
44 changed files with 4125 additions and 15298 deletions
--- a/algo/sha/sha1-hash.c
+++ b/algo/sha/sha1-hash.c
@@ -0,0 +1,390 @@
+#include "simd-utils.h"
+#include <stdint.h>
+#include "sha1-hash.h"
+
+#if defined(__x86_64__) && defined(__SHA__)
+
+#define sha1_opt_rounds( state_out, data, state_in ) \
+{ \
+    __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; \
+    __m128i MSG0, MSG1, MSG2, MSG3; \
+\
+    ABCD = _mm_load_si128( (const __m128i*) state_in ); \
+    E0 = _mm_set_epi32( state_in[4], 0, 0, 0 ); \
+    ABCD = _mm_shuffle_epi32( ABCD, 0x1B ); \
+\
+    ABCD_SAVE = ABCD; \
+    E0_SAVE = E0; \
+\
+    /* Rounds 0-3 */ \
+    MSG0 = load_msg( data, 0 ); \
+    E0 = _mm_add_epi32( E0, MSG0 ); \
+    E1 = ABCD; \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \
+\
+    /* Rounds 4-7 */ \
+    MSG1 = load_msg( data, 1 ); \
+    E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \
+    E0 = ABCD; \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 0 ); \
+    MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \
+\
+    /* Rounds 8-11 */ \
+    MSG2 = load_msg( data, 2 ); \
+    E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \
+    E1 = ABCD; \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \
+    MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \
+    MSG0 = _mm_xor_si128( MSG0, MSG2 ); \
+\
+    /* Rounds 12-15 */ \
+    MSG3 = load_msg( data, 3 ); \
+    E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \
+    E0 = ABCD; \
+    MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 0 ); \
+    MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \
+    MSG1 = _mm_xor_si128( MSG1, MSG3 ); \
+\
+    /* Rounds 16-19 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \
+    E1 = ABCD; \
+    MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \
+    MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \
+    MSG2 = _mm_xor_si128( MSG2, MSG0 ); \
+\
+    /* Rounds 20-23 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \
+    E0 = ABCD; \
+    MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \
+    MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \
+    MSG3 = _mm_xor_si128( MSG3, MSG1 ); \
+\
+    /* Rounds 24-27 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \
+    E1 = ABCD; \
+    MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 1 ); \
+    MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \
+    MSG0 = _mm_xor_si128( MSG0, MSG2 ); \
+\
+    /* Rounds 28-31 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \
+    E0 = ABCD; \
+    MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \
+    MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \
+    MSG1 = _mm_xor_si128( MSG1, MSG3 ); \
+\
+    /* Rounds 32-35 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \
+    E1 = ABCD; \
+    MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 1 ); \
+    MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \
+    MSG2 = _mm_xor_si128( MSG2, MSG0 ); \
+\
+    /* Rounds 36-39 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \
+    E0 = ABCD; \
+    MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \
+    MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \
+    MSG3 = _mm_xor_si128( MSG3, MSG1 ); \
+\
+    /* Rounds 40-43 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \
+    E1 = ABCD; \
+    MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \
+    MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \
+    MSG0 = _mm_xor_si128( MSG0, MSG2 ); \
+\
+    /* Rounds 44-47 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \
+    E0 = ABCD; \
+    MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 2 ); \
+    MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \
+    MSG1 = _mm_xor_si128( MSG1, MSG3 ); \
+\
+    /* Rounds 48-51 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \
+    E1 = ABCD; \
+    MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \
+    MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \
+    MSG2 = _mm_xor_si128( MSG2, MSG0 ); \
+    E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \
+    E1 = ABCD; \
+    MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \
+    MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \
+    MSG2 = _mm_xor_si128( MSG2, MSG0 ); \
+\
+    /* Rounds 52-55 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \
+    E0 = ABCD; \
+    MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 2 ); \
+    MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \
+    MSG3 = _mm_xor_si128( MSG3, MSG1 ); \
+\
+    /* Rounds 56-59 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \
+    E1 = ABCD; \
+    MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \
+    MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \
+    MSG0 = _mm_xor_si128( MSG0, MSG2 ); \
+\
+    /* Rounds 60-63 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \
+    E0 = ABCD; \
+    MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \
+    MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \
+    MSG1 = _mm_xor_si128( MSG1, MSG3 ); \
+\
+    /* Rounds 64-67 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \
+    E1 = ABCD; \
+    MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 3 ); \
+    MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \
+    MSG2 = _mm_xor_si128( MSG2, MSG0 ); \
+\
+    /* Rounds 68-71 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \
+    E0 = ABCD; \
+    MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \
+    MSG3 = _mm_xor_si128( MSG3, MSG1 ); \
+\
+    /* Rounds 72-75 */ \
+    E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \
+    E1 = ABCD; \
+    MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 3 ); \
+\
+    /* Rounds 76-79 */ \
+    E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \
+    E0 = ABCD; \
+    ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \
+\
+    /* Combine state */ \
+    E0 = _mm_sha1nexte_epu32( E0, E0_SAVE ); \
+    ABCD = _mm_add_epi32( ABCD, ABCD_SAVE ); \
+\
+    /* Save state */ \
+    ABCD = _mm_shuffle_epi32( ABCD, 0x1B ); \
+    _mm_store_si128( (__m128i*) state_out, ABCD ); \
+    state_out[4] = _mm_extract_epi32( E0, 3 ); \
+}
+
+
+void sha1_x86_sha_transform_le( uint32_t *state_out, const void *input,
+                                const uint32_t *state_in )
+{
+#define load_msg( m, i ) casti_v128( m, i )
+   sha1_opt_rounds( state_out, input, state_in );
+#undef load_msg
+}
+
+void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,
+                                const uint32_t *state_in )
+{
+   const __m128i MASK = _mm_set_epi64x( 0x0001020304050607ULL,
+                                        0x08090a0b0c0d0e0fULL );
+#define load_msg( m, i ) _mm_shuffle_epi8( casti_v128( m, i ), MASK )
+   sha1_opt_rounds( state_out, input, state_in );
+#undef load_msg
+}
+
+#endif
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
+
+#define sha1_neon_rounds( state_out, data, state_in ) \
+{ \
+    uint32x4_t ABCD, ABCD_SAVED; \
+    uint32x4_t TMP0, TMP1; \
+    uint32x4_t MSG0, MSG1, MSG2, MSG3; \
+    uint32_t   E0, E0_SAVED, E1; \
+\
+    /* Load state */ \
+    ABCD = vld1q_u32( &state_in[0] ); \
+    E0 = state_in[4]; \
+\
+    /* Save state */ \
+    ABCD_SAVED = ABCD; \
+    E0_SAVED = E0; \
+\
+    MSG0 = load_msg( data, 0 ); \
+    MSG1 = load_msg( data, 1 ); \
+    MSG2 = load_msg( data, 2 ); \
+    MSG3 = load_msg( data, 3 ); \
+\
+    TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x5A827999 ) ); \
+    TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x5A827999 ) ); \
+\
+    /* Rounds 0-3 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x5A827999 ) ); \
+    MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \
+\
+    /* Rounds 4-7 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1cq_u32(ABCD, E1, TMP1); \
+    TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x5A827999 ) ); \
+    MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \
+    MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \
+\
+    /* Rounds 8-11 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x5A827999 ) ); \
+    MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \
+    MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \
+\
+    /* Rounds 12-15 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1cq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x6ED9EBA1 ) ); \
+    MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \
+    MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \
+\
+    /* Rounds 16-19 */\
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x6ED9EBA1 ) ); \
+    MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \
+    MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \
+\
+    /* Rounds 20-23 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x6ED9EBA1 ) ); \
+    MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \
+    MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \
+\
+    /* Rounds 24-27 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x6ED9EBA1 ) ); \
+    MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \
+    MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \
+\
+    /* Rounds 28-31 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x6ED9EBA1 ) ); \
+    MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \
+    MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \
+\
+    /* Rounds 32-35 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x8F1BBCDC ) ); \
+    MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \
+    MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \
+\
+    /* Rounds 36-39 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x8F1BBCDC ) ); \
+    MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \
+    MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \
+\
+    /* Rounds 40-43 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x8F1BBCDC ) ); \
+    MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \
+    MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \
+\
+    /* Rounds 44-47 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1mq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x8F1BBCDC ) ); \
+    MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \
+    MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \
+\
+    /* Rounds 48-51 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x8F1BBCDC ) ); \
+    MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \
+    MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \
+\
+    /* Rounds 52-55 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1mq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0xCA62C1D6 ) ); \
+    MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \
+    MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \
+\
+    /* Rounds 56-59 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0xCA62C1D6 ) ); \
+    MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \
+    MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \
+\
+    /* Rounds 60-63 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0xCA62C1D6 ) ); \
+    MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \
+    MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \
+\
+    /* Rounds 64-67 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \
+    TMP0 = vaddq_u32(MSG2, vdupq_n_u32( 0xCA62C1D6 ) ); \
+    MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \
+    MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \
+\
+    /* Rounds 68-71 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+    TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0xCA62C1D6 ) ); \
+    MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \
+\
+    /* Rounds 72-75 */ \
+    E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \
+\
+    /* Rounds 76-79 */ \
+    E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \
+    ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \
+\
+    /* Combine state */ \
+    E0 += E0_SAVED; \
+    ABCD = vaddq_u32( ABCD_SAVED, ABCD ); \
+\
+    /* Save state */ \
+    vst1q_u32( &state_out[0], ABCD ); \
+    state_out[4] = E0; \
+}
+
+void sha1_neon_sha_transform_be( uint32_t *state_out, const void *input,
+                                 const uint32_t *state_in )
+{
+#define load_msg( m, i )  v128_bswap32( casti_v128( m, i ) );
+   sha1_neon_rounds( state_out, input, state_in );
+#undef load_msg
+}
+
+void sha1_neon_sha_transform_le( uint32_t *state_out, const void *input,
+                                 const uint32_t *state_in )
+{
+#define load_msg( m, i )  casti_v128( m, i );
+   sha1_neon_rounds( state_out, input, state_in );
+#undef load_msg
+}
+
+#endif      
--- a/algo/sha/sha1-hash.h
+++ b/algo/sha/sha1-hash.h
@@ -0,0 +1,40 @@
+#ifndef SHA1_HASH_H__
+#define SHA1_HASH_H__ 1
+
+#include <stddef.h>
+#include "simd-utils.h"
+#include "cpuminer-config.h"
+#include "sph_sha1.h"
+
+// SHA hooks for sha1, automaticaaly substituded in SPH
+#if defined(__x86_64__) && defined(__SHA__)
+
+void sha1_x86_sha_transform_le( uint32_t *state_out, const void *input,
+                                const uint32_t *state_in );
+
+void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,
+                                const uint32_t *state_in );
+
+#define sha1_transform_le        sha1_x86_sha_transform_le
+#define sha1_transform_be        sha1_x86_sha_transform_be
+
+#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
+
+void sha1_neon_sha_transform_be( uint32_t *state_out, const void *input,
+                                 const uint32_t *state_in );
+void sha1_neon_sha_transform_le( uint32_t *state_out, const void *input,
+                                 const uint32_t *state_in );
+
+#define sha1_transform_le        sha1_neon_sha_transform_le
+#define sha1_transform_be        sha1_neon_sha_transform_be
+
+#else
+
+#define sha1_transform_le        sph_sha1_transform_le
+#define sha1_transform_be        sph_sha1_transform_be
+
+#endif
+
+#define sha1_full                sph_sha1_full
+
+#endif
--- a/algo/sha/sha1.c
+++ b/algo/sha/sha1.c
@@ -0,0 +1,400 @@
+/* $Id: sha1.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * SHA-1 implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include "simd-utils.h"
+#include "sha1-hash.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((B) ^ (C) ^ (D))
+#define H(B, C, D)     (((D) & (C)) | (((D) | (C)) & (B)))
+#define I(B, C, D)     G(B, C, D)
+
+#define ROTL    rol32
+//#define ROTL    SPH_ROTL32
+
+#define K1     SPH_C32(0x5A827999)
+#define K2     SPH_C32(0x6ED9EBA1)
+#define K3     SPH_C32(0x8F1BBCDC)
+#define K4     SPH_C32(0xCA62C1D6)
+
+static const sph_u32 IV[5] = {
+	SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
+	SPH_C32(0x98BADCFE), SPH_C32(0x10325476),
+	SPH_C32(0xC3D2E1F0)
+};
+
+/*
+ * This macro defines the body for a SHA-1 compression function
+ * implementation. The "in" parameter should evaluate, when applied to a
+ * numerical input parameter from 0 to 15, to an expression which yields
+ * the corresponding input block. The "r" parameter should evaluate to
+ * an array or pointer expression designating the array of 5 words which
+ * contains the input and output of the compression function.
+ */
+
+#define SHA1_ROUND_BODY(in, r)   do { \
+		sph_u32 A, B, C, D, E; \
+		sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
+		sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
+ \
+		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+		D = (r)[3]; \
+		E = (r)[4]; \
+ \
+		W00 = in(0); \
+		E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W00 + K1); \
+		B = ROTL(B, 30); \
+		W01 = in(1); \
+		D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W01 + K1); \
+		A = ROTL(A, 30); \
+		W02 = in(2); \
+		C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W02 + K1); \
+		E = ROTL(E, 30); \
+		W03 = in(3); \
+		B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W03 + K1); \
+		D = ROTL(D, 30); \
+		W04 = in(4); \
+		A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W04 + K1); \
+		C = ROTL(C, 30); \
+		W05 = in(5); \
+		E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W05 + K1); \
+		B = ROTL(B, 30); \
+		W06 = in(6); \
+		D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W06 + K1); \
+		A = ROTL(A, 30); \
+		W07 = in(7); \
+		C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W07 + K1); \
+		E = ROTL(E, 30); \
+		W08 = in(8); \
+		B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W08 + K1); \
+		D = ROTL(D, 30); \
+		W09 = in(9); \
+		A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W09 + K1); \
+		C = ROTL(C, 30); \
+		W10 = in(10); \
+		E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W10 + K1); \
+		B = ROTL(B, 30); \
+		W11 = in(11); \
+		D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W11 + K1); \
+		A = ROTL(A, 30); \
+		W12 = in(12); \
+		C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W12 + K1); \
+		E = ROTL(E, 30); \
+		W13 = in(13); \
+		B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W13 + K1); \
+		D = ROTL(D, 30); \
+		W14 = in(14); \
+		A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W14 + K1); \
+		C = ROTL(C, 30); \
+		W15 = in(15); \
+		E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W15 + K1); \
+		B = ROTL(B, 30); \
+		W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \
+		D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W00 + K1); \
+		A = ROTL(A, 30); \
+		W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \
+		C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W01 + K1); \
+		E = ROTL(E, 30); \
+		W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \
+		B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W02 + K1); \
+		D = ROTL(D, 30); \
+		W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \
+		A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W03 + K1); \
+		C = ROTL(C, 30); \
+		W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \
+		E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W04 + K2); \
+		B = ROTL(B, 30); \
+		W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \
+		D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W05 + K2); \
+		A = ROTL(A, 30); \
+		W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \
+		C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W06 + K2); \
+		E = ROTL(E, 30); \
+		W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \
+		B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W07 + K2); \
+		D = ROTL(D, 30); \
+		W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \
+		A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W08 + K2); \
+		C = ROTL(C, 30); \
+		W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \
+		E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W09 + K2); \
+		B = ROTL(B, 30); \
+		W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \
+		D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W10 + K2); \
+		A = ROTL(A, 30); \
+		W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \
+		C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W11 + K2); \
+		E = ROTL(E, 30); \
+		W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \
+		B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W12 + K2); \
+		D = ROTL(D, 30); \
+		W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \
+		A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W13 + K2); \
+		C = ROTL(C, 30); \
+		W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \
+		E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W14 + K2); \
+		B = ROTL(B, 30); \
+		W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \
+		D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W15 + K2); \
+		A = ROTL(A, 30); \
+		W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \
+		C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W00 + K2); \
+		E = ROTL(E, 30); \
+		W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \
+		B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W01 + K2); \
+		D = ROTL(D, 30); \
+		W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \
+		A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W02 + K2); \
+		C = ROTL(C, 30); \
+		W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \
+		E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W03 + K2); \
+		B = ROTL(B, 30); \
+		W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \
+		D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W04 + K2); \
+		A = ROTL(A, 30); \
+		W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \
+		C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W05 + K2); \
+		E = ROTL(E, 30); \
+		W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \
+		B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W06 + K2); \
+		D = ROTL(D, 30); \
+		W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \
+		A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W07 + K2); \
+		C = ROTL(C, 30); \
+		W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \
+		E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W08 + K3); \
+		B = ROTL(B, 30); \
+		W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \
+		D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W09 + K3); \
+		A = ROTL(A, 30); \
+		W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \
+		C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W10 + K3); \
+		E = ROTL(E, 30); \
+		W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \
+		B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W11 + K3); \
+		D = ROTL(D, 30); \
+		W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \
+		A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W12 + K3); \
+		C = ROTL(C, 30); \
+		W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \
+		E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W13 + K3); \
+		B = ROTL(B, 30); \
+		W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \
+		D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W14 + K3); \
+		A = ROTL(A, 30); \
+		W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \
+		C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W15 + K3); \
+		E = ROTL(E, 30); \
+		W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \
+		B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W00 + K3); \
+		D = ROTL(D, 30); \
+		W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \
+		A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W01 + K3); \
+		C = ROTL(C, 30); \
+		W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \
+		E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W02 + K3); \
+		B = ROTL(B, 30); \
+		W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \
+		D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W03 + K3); \
+		A = ROTL(A, 30); \
+		W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \
+		C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W04 + K3); \
+		E = ROTL(E, 30); \
+		W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \
+		B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W05 + K3); \
+		D = ROTL(D, 30); \
+		W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \
+		A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W06 + K3); \
+		C = ROTL(C, 30); \
+		W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \
+		E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W07 + K3); \
+		B = ROTL(B, 30); \
+		W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \
+		D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W08 + K3); \
+		A = ROTL(A, 30); \
+		W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \
+		C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W09 + K3); \
+		E = ROTL(E, 30); \
+		W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \
+		B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W10 + K3); \
+		D = ROTL(D, 30); \
+		W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \
+		A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W11 + K3); \
+		C = ROTL(C, 30); \
+		W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \
+		E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W12 + K4); \
+		B = ROTL(B, 30); \
+		W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \
+		D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W13 + K4); \
+		A = ROTL(A, 30); \
+		W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \
+		C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W14 + K4); \
+		E = ROTL(E, 30); \
+		W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \
+		B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W15 + K4); \
+		D = ROTL(D, 30); \
+		W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \
+		A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W00 + K4); \
+		C = ROTL(C, 30); \
+		W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \
+		E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W01 + K4); \
+		B = ROTL(B, 30); \
+		W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \
+		D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W02 + K4); \
+		A = ROTL(A, 30); \
+		W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \
+		C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W03 + K4); \
+		E = ROTL(E, 30); \
+		W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \
+		B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W04 + K4); \
+		D = ROTL(D, 30); \
+		W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \
+		A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W05 + K4); \
+		C = ROTL(C, 30); \
+		W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \
+		E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W06 + K4); \
+		B = ROTL(B, 30); \
+		W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \
+		D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W07 + K4); \
+		A = ROTL(A, 30); \
+		W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \
+		C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W08 + K4); \
+		E = ROTL(E, 30); \
+		W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \
+		B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W09 + K4); \
+		D = ROTL(D, 30); \
+		W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \
+		A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W10 + K4); \
+		C = ROTL(C, 30); \
+		W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \
+		E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W11 + K4); \
+		B = ROTL(B, 30); \
+		W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \
+		D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W12 + K4); \
+		A = ROTL(A, 30); \
+		W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \
+		C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W13 + K4); \
+		E = ROTL(E, 30); \
+		W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \
+		B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W14 + K4); \
+		D = ROTL(D, 30); \
+		W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \
+		A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W15 + K4); \
+		C = ROTL(C, 30); \
+ \
+		(r)[0] = SPH_T32(r[0] + A); \
+		(r)[1] = SPH_T32(r[1] + B); \
+		(r)[2] = SPH_T32(r[2] + C); \
+		(r)[3] = SPH_T32(r[3] + D); \
+		(r)[4] = SPH_T32(r[4] + E); \
+	} while (0)
+
+/*
+ * One round of SHA-1. The data must be aligned for 32-bit access.
+ */
+#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__aarch64__) && defined(__ARM_FEATURE_SHA2) )
+
+static void
+sha1_round( const unsigned char *data, sph_u32 r[5] )
+{
+  sha1_transform_be( (uint32_t*)r,  (uint32_t*)data, (const uint32_t*)r ); 
+}
+
+#else
+   
+static void
+sha1_round( const unsigned char *data, sph_u32 r[5] )
+{
+#define SHA1_IN(x)   sph_dec32be_aligned(data + (4 * (x)))
+	SHA1_ROUND_BODY(SHA1_IN, r);
+#undef SHA1_IN
+}
+
+#endif
+
+/* see sph_sha1.h */
+void
+sph_sha1_init(void *cc)
+{
+	sph_sha1_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, IV, sizeof IV);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define RFUN   sha1_round
+#define HASH   sha1
+#define BE32   1
+#include "md_helper.c"
+
+/* see sph_sha1.h */
+void
+sph_sha1_close(void *cc, void *dst)
+{
+	sha1_close(cc, dst, 5);
+	sph_sha1_init(cc);
+}
+
+/* see sph_sha1.h */
+void
+sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	sha1_addbits_and_close(cc, ub, n, dst, 5);
+	sph_sha1_init(cc);
+}
+
+/* see sph_sha1.h */
+void
+sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5])
+{
+#define SHA1_IN(x)   msg[x]
+	SHA1_ROUND_BODY(SHA1_IN, val);
+#undef SHA1_IN
+}
+
+
+void sph_sha1_full( void *hash, const void *msg, size_t len )
+{   
+   sph_sha1_context cc;   
+   sph_sha1_init( &cc );
+   sph_sha1( &cc, msg, len );
+   sph_sha1_close( &cc, hash );
+}
--- a/algo/sha/sph_sha1.h
+++ b/algo/sha/sph_sha1.h
@@ -0,0 +1,133 @@
+/* $Id: sph_sha1.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * SHA-1 interface.
+ *
+ * SHA-1 is described in FIPS 180-1 (now superseded by FIPS 180-2, but the
+ * description of SHA-1 is still included and has not changed). FIPS
+ * standards can be found at: http://csrc.nist.gov/publications/fips/
+ *
+ * @warning   A theoretical collision attack against SHA-1, with work
+ * factor 2^63, has been published. SHA-1 should not be used in new
+ * protocol designs.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_sha1.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_SHA1_H__
+#define SPH_SHA1_H__
+
+#include <stddef.h>
+#include "compat/sph_types.h"
+
+/**
+ * Output size (in bits) for SHA-1.
+ */
+#define SPH_SIZE_sha1   160
+
+/**
+ * This structure is a context for SHA-1 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a SHA-1 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running SHA-1 computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u32 val[5];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_sha1_context;
+
+/**
+ * Initialize a SHA-1 context. This process performs no memory allocation.
+ *
+ * @param cc   the SHA-1 context (pointer to a <code>sph_sha1_context</code>)
+ */
+void sph_sha1_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the SHA-1 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_sha1(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current SHA-1 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (20 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the SHA-1 context
+ * @param dst   the destination buffer
+ */
+void sph_sha1_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (20 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the SHA-1 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Apply the SHA-1 compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 32-bit input blocks,
+ * as numerical values (hence after the big-endian decoding). The
+ * <code>val</code> parameter contains the 5 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 160-bit input and output
+ */
+void sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5]);
+
+void sph_sha1_full( void *hash, const void *msg, size_t len );
+
+#endif