34 #include <openssl/sha.h>
36 #include <emmintrin.h>
38 static inline void xor_salsa8_sse2(__m128i B[4],
const __m128i Bx[4])
40 __m128i X0, X1, X2, X3;
44 X0 = B[0] = _mm_xor_si128(B[0], Bx[0]);
45 X1 = B[1] = _mm_xor_si128(B[1], Bx[1]);
46 X2 = B[2] = _mm_xor_si128(B[2], Bx[2]);
47 X3 = B[3] = _mm_xor_si128(B[3], Bx[3]);
49 for (i = 0; i < 8; i += 2) {
51 T = _mm_add_epi32(X0, X3);
52 X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7));
53 X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25));
54 T = _mm_add_epi32(X1, X0);
55 X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
56 X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
57 T = _mm_add_epi32(X2, X1);
58 X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13));
59 X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19));
60 T = _mm_add_epi32(X3, X2);
61 X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
62 X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
65 X1 = _mm_shuffle_epi32(X1, 0x93);
66 X2 = _mm_shuffle_epi32(X2, 0x4E);
67 X3 = _mm_shuffle_epi32(X3, 0x39);
70 T = _mm_add_epi32(X0, X1);
71 X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7));
72 X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25));
73 T = _mm_add_epi32(X3, X0);
74 X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
75 X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
76 T = _mm_add_epi32(X2, X3);
77 X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13));
78 X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19));
79 T = _mm_add_epi32(X1, X2);
80 X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
81 X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
84 X1 = _mm_shuffle_epi32(X1, 0x39);
85 X2 = _mm_shuffle_epi32(X2, 0x4E);
86 X3 = _mm_shuffle_epi32(X3, 0x93);
89 B[0] = _mm_add_epi32(B[0], X0);
90 B[1] = _mm_add_epi32(B[1], X1);
91 B[2] = _mm_add_epi32(B[2], X2);
92 B[3] = _mm_add_epi32(B[3], X3);
105 V = (__m128i *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
109 for (k = 0; k < 2; k++) {
110 for (i = 0; i < 16; i++) {
111 X.u32[k * 16 + i] = le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]);
115 for (i = 0; i < 1024; i++) {
116 for (k = 0; k < 8; k++)
117 V[i * 8 + k] =
X.i128[k];
118 xor_salsa8_sse2(&
X.i128[0], &
X.i128[4]);
119 xor_salsa8_sse2(&
X.i128[4], &
X.i128[0]);
121 for (i = 0; i < 1024; i++) {
122 j = 8 * (
X.u32[16] & 1023);
123 for (k = 0; k < 8; k++)
124 X.i128[k] = _mm_xor_si128(
X.i128[k], V[j + k]);
125 xor_salsa8_sse2(&
X.i128[0], &
X.i128[4]);
126 xor_salsa8_sse2(&
X.i128[4], &
X.i128[0]);
129 for (k = 0; k < 2; k++) {
130 for (i = 0; i < 16; i++) {
131 le32enc(&B[(k * 16 + (i * 5 % 16)) * 4],
X.u32[k * 16 + i]);
void PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen)
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and write the output to buf.
void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad)