前几天做 mcmc 遇到一个魔改的 Chacha20算法,顺便学一下,抄的这份博客

Chacha20 加密算法

算法简介

ChaCha20-Poly1305 是一种 认证加密 算法。 ChaCha20-Poly1305加密时无需硬件加速,而且加密速度通常比AES-GCM更快,所以某些 移动设备中会优先采用ChaCha20-Poly1305加密算法。ChaCha20-Poly1305由两部分组成,分别是Poly1305和ChaCha20。 —— 摘自【维基百科】

初始化矩阵

ChaCha20 加密的初始状态包括

  1. 一个128位常量(Constant),常量的内容为 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
  2. 一个256位密钥(Key)
  3. 一个64位计数(Counter)
  4. 一个64位随机数(Nonce)

一共64字节其排列成 4 * 4 的 32 位字矩阵如下所示:(实际运算为小端)

1 2 3 4
Constant Constant Constant Constant
Key Key Key Key
Key Key Key Key
Pos Pos Nonce Nonce

1/4 轮操作

在ChaCha20算法当中,一个基础的操作即为1/4轮运算,它主要操作4个32位的无符号整数,具体操作如下:

img

代码如下:

1
2
3
4
5
6
7
8
9
10
11
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d)
{
x[a] += x[b];
x[d] = rotl32(x[d] ^ x[a], 16);
x[c] += x[d];
x[b] = rotl32(x[b] ^ x[c], 12);
x[a] += x[b];
x[d] = rotl32(x[d] ^ x[a], 8);
x[c] += x[d];
x[b] = rotl32(x[b] ^ x[c], 7);
}

块函数

块函数输入是之前所生成的状态矩阵,最终输出64bit的”随机化”的字节块函数加密时分两种情况,一种是计数为奇数时,进行行变换,如下:

Original Matrix Row Transform(Odd round)
x[0], x[1], x[2], x[3] x[0], x[4], x[8], x[C]
x[4], x[5], x[6], x[7] x[1], x[5], x[9], x[D]
x[8], x[9], x[A], x[B] x[2], x[6], x[A], x[E]
x[C], x[D], x[E], x[F] x[3], x[7], x[B], x[F]

为偶数的时候进行列变换,如下:

Original Matrix Row Transform(Odd round) Diagonal Transform(even round)
x[0], x[1], x[2], x[3] x[0], x[4], x[8], x[C] x[0], x[5], x[A], x[F]
x[4], x[5], x[6], x[7] x[1], x[5], x[9], x[D] x[1], x[6], x[B], x[C]
x[8], x[9], x[A], x[B] x[2], x[6], x[A], x[E] x[2], x[7], x[8], x[D]
x[C], x[D], x[E], x[F] x[3], x[7], x[B], x[F] x[3], x[4], x[9], x[E]

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static void chacha20_block(uint32_t in[16], uint8_t out[64], int num_rounds)
{ // num_rounds 一般为20
int i;
uint32_t x[16];

memcpy(x, in, sizeof(uint32_t) * 16);

for (i = num_rounds; i > 0; i -= 2)
{
//odd round 奇数行变换
chacha20_quarterround(x, 0, 4, 8, 12);
chacha20_quarterround(x, 1, 5, 9, 13);
chacha20_quarterround(x, 2, 6, 10, 14);
chacha20_quarterround(x, 3, 7, 11, 15);
//even round 偶数列变换
chacha20_quarterround(x, 0, 5, 10, 15);
chacha20_quarterround(x, 1, 6, 11, 12);
chacha20_quarterround(x, 2, 7, 8, 13);
chacha20_quarterround(x, 3, 4, 9, 14);
}

for (i = 0; i < 16; i++) x[i] += in[i];

chacha20_serialize(x, out);
}

再嫖一个完整的轮子

C语言实现

Chacha20.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
##include <stdint.h>
##include <string.h>
##include "chacha20.h"

static inline void u32t8le(uint32_t v, uint8_t p[4]) {
p[0] = v & 0xff;
p[1] = (v >> 8) & 0xff;
p[2] = (v >> 16) & 0xff;
p[3] = (v >> 24) & 0xff;
}

static inline uint32_t u8t32le(uint8_t p[4]) {
uint32_t value = p[3];

value = (value << 8) | p[2];
value = (value << 8) | p[1];
value = (value << 8) | p[0];

return value;
}

static inline uint32_t rotl32(uint32_t x, int n) {
// http://blog.regehr.org/archives/1063
return x << n | (x >> (-n & 31));
}

// https://tools.ietf.org/html/rfc7539##section-2.1
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) {
x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 7);
}

static void chacha20_serialize(uint32_t in[16], uint8_t output[64]) {
int i;
for (i = 0; i < 16; i++) {
u32t8le(in[i], output + (i << 2));
}
}

static void chacha20_block(uint32_t in[16], uint8_t out[64], int num_rounds) { // num_rounds 一般为20
int i;
uint32_t x[16];

memcpy(x, in, sizeof(uint32_t) * 16);

for (i = num_rounds; i > 0; i -= 2) {
//odd round
chacha20_quarterround(x, 0, 4, 8, 12);
chacha20_quarterround(x, 1, 5, 9, 13);
chacha20_quarterround(x, 2, 6, 10, 14);
chacha20_quarterround(x, 3, 7, 11, 15);
//even round
chacha20_quarterround(x, 0, 5, 10, 15);
chacha20_quarterround(x, 1, 6, 11, 12);
chacha20_quarterround(x, 2, 7, 8, 13);
chacha20_quarterround(x, 3, 4, 9, 14);
}

for (i = 0; i < 16; i++) {
x[i] += in[i];
}

chacha20_serialize(x, out);
}

// https://tools.ietf.org/html/rfc7539##section-2.3
static void chacha20_init_state(uint32_t s[16], uint8_t key[32], uint32_t counter, uint8_t nonce[12]) {
int i;

// refer: https://dxr.mozilla.org/mozilla-beta/source/security/nss/lib/freebl/chacha20.c
// convert magic number to string: "expand 32-byte k"
s[0] = 0x61707865;
s[1] = 0x3320646e;
s[2] = 0x79622d32;
s[3] = 0x6b206574;

for (i = 0; i < 8; i++) {
s[4 + i] = u8t32le(key + i * 4);
}

s[12] = counter;

for (i = 0; i < 3; i++) {
s[13 + i] = u8t32le(nonce + i * 4);
}
}

void ChaCha20XOR(uint8_t key[32], uint32_t counter, uint8_t nonce[12], uint8_t *in, uint8_t *out, int inlen) {
int i, j;

uint32_t s[16];
uint8_t block[64];

chacha20_init_state(s, key, counter, nonce);

for (i = 0; i < inlen; i += 64) {
chacha20_block(s, block, 20);
s[12]++;

for (j = i; j < i + 64; j++) {
if (j >= inlen) {
break;
}
out[j] = in[j] ^ block[j - i];
}
}
}

Chacha20.h

1
2
3
4
5
6
7
8
##ifndef __CHACHA20_H
##define __CHACHA20_H
##include <stdint.h>

void ChaCha20XOR(uint8_t key[32], uint32_t counter, uint8_t nonce[12], uint8_t *input, uint8_t *output, int inputlen);

##endif

main.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
##include <stdio.h>
##include "chacha20.h"

int main(int argc, char **argv) {
int i;

uint8_t key[] = {
0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b,
0x1c, 0x1d, 0x1e, 0x1f
};

uint8_t nonce[] = { // 随机数
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x00
};

uint8_t input[114] = {
0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
0x74, 0x2e
};

uint8_t encrypt[114];
uint8_t decrypt[114];

ChaCha20XOR(key, 1, nonce, input, encrypt, 114); //1 就是conter
ChaCha20XOR(key, 1, nonce, encrypt, decrypt, 114);

printf("\nkey:");
for (i = 0; i < 32; i++) {
if (!(i % 16)) {
printf("\n");
}
printf("%02x ", key[i]);
}

printf("\n\nnonce:\n");
for (i = 0; i < 12; i++) {
printf("%02x ", nonce[i]);
}

printf("\n\nplaintext:");
for (i = 0; i < 114; i++) {
if (!(i % 16)) {
printf("\n");
}
printf("%02x ", input[i]);
}

printf("\n\nencrypted:");
for (i = 0; i < 114; i++) {
if (!(i % 16)) {
printf("\n");
}
printf("%02x ", encrypt[i]);
}

printf("\n\ndecrypted:");
for (i = 0; i < 114; i++) {
if (!(i % 16)) {
printf("\n");
}
printf("%02x ", decrypt[i]);
}

printf("\n");
return 0;
}

python 实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def main():
runtests()

def chacha20_decrypt(key, counter, nonce, ciphertext):
return chacha20_encrypt(key, counter, nonce, ciphertext)

def chacha20_encrypt(key, counter, nonce, plaintext):
byte_length = len(plaintext)
full_blocks = byte_length//64
remainder_bytes = byte_length % 64
encrypted_message = b''

for i in range(full_blocks):
key_stream = serialize(chacha20_block(key, counter + i, nonce))
plaintext_block = plaintext[i*64:i*64+64]
encrypted_block = [plaintext_block[j] ^ key_stream[j] for j in range(64)]
encrypted_message += bytes(encrypted_block)
if remainder_bytes != 0:
key_stream = serialize(chacha20_block(key, counter + full_blocks, nonce))
plaintext_block = plaintext[full_blocks*64:byte_length]
encrypted_block = [plaintext_block[j] ^ key_stream[j] for j in range(remainder_bytes)]
encrypted_message += bytes(encrypted_block)

return encrypted_message

## returns a list of 16 32-bit unsigned integers
def chacha20_block(key, counter, nonce):
BLOCK_CONSTANTS = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]
init_state = BLOCK_CONSTANTS + key + [counter] + nonce
current_state = init_state[:]
for i in range(10):
inner_block(current_state)
for i in range(16):
current_state[i] = add_32(current_state[i], init_state[i])

return current_state

def inner_block(state):
## columns
quarterround(state, 0, 4, 8, 12)
quarterround(state, 1, 5, 9, 13)
quarterround(state, 2, 6, 10, 14)
quarterround(state, 3, 7, 11, 15)
## diagonals
quarterround(state, 0, 5, 10, 15)
quarterround(state, 1, 6, 11, 12)
quarterround(state, 2, 7, 8, 13)
quarterround(state, 3, 4, 9, 14)

def xor_32(x, y):
return (x ^ y) & 0xffffffff

def add_32(x, y):
return (x + y) & 0xffffffff

def rot_l32(x, n):
return ((x << n) | (x >> (32 - n))) & 0xffffffff

def quarterround(state, i1, i2, i3, i4):
a = state[i1]
b = state[i2]
c = state[i3]
d = state[i4]

a = add_32(a, b); d = xor_32(d, a); d = rot_l32(d, 16)
c = add_32(c, d); b = xor_32(b, c); b = rot_l32(b, 12)
a = add_32(a, b); d = xor_32(d, a); d = rot_l32(d, 8)
c = add_32(c, d); b = xor_32(b, c); b = rot_l32(b, 7)

state[i1] = a
state[i2] = b
state[i3] = c
state[i4] = d

def serialize(block):
return b''.join([(word).to_bytes(4, 'little') for word in block])

## Test Vectors from RFC 8439
def runtests():

key = [0x2519EB0A, 0x909CE82E, 0xD6C085EC, 0x545ACF07, 0x24124049, 0x1E1353E7, 0x14AD4F2F, 0xE98FF6DE]
plaintext = b"\x8e\x91\x9e\xbe\x6a\x6c\x64\xc1\x02\x02\xf8\xda\xc4\xc8\xd6\x14\xa0\xa3\x9c\x0e\x62\x64\x70\x6d\x02\x02\x0c\x9d\xd2\xd6\xc6\xa8"
nonce = [0x7369C667, 0xEC4AFF51, 0xABBACD29]
init_counter = 0x00000001
ciphertext = chacha20_encrypt(key, init_counter, nonce, plaintext)
for i in range(len(ciphertext)):
print(hex(ciphertext[i])[2:],end = " ")
assert(chacha20_decrypt(key, init_counter, nonce, ciphertext) == plaintext)

print("All tests passed!")

main();

Salsa20 加密算法

其实Salsa20加密和ChaCha20特别相似,ChaCha20是对Salsa20上稍微做了调整,数据bit扩散更快。每一个1/4 round会修改一个字两次,每一个输入字也会影响到输出字。两种加密算法只有四分之一论操作有一点点不同:

ChaCha20是这样的:

1
2
3
4
5
6
7
8
9
10
11
12
static inline uint32_t rotl32(uint32_t x, int n) {
// http://blog.regehr.org/archives/1063
return x << n | (x >> (-n & 31));
}

// https://tools.ietf.org/html/rfc7539#section-2.1
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) {
x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 7);
}

Salsa20则是这样的:

1
2
3
4
5
6
#define quarter(a,b,c,d) do {\
b ^= R(d+a, 7);\
c ^= R(a+b, 9);\
d ^= R(b+c, 13);\
a ^= R(c+d, 18);\
} while (0)

代码实现

C语言实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <stdio.h>
#include <stdint.h> // we use 32-bit words

// rotate x to left by n bits, the bits that go over the left edge reappear on the right
#define R(x,n) (((x) << (n)) | ((x) >> (32-(n))))

// addition wraps modulo 2^32
// the choice of 7,9,13,18 "doesn't seem very important" (spec)
#define quarter(a,b,c,d) do {\
b ^= R(d+a, 7);\
c ^= R(a+b, 9);\
d ^= R(b+c, 13);\
a ^= R(c+d, 18);\
} while (0)

void salsa20_words(uint32_t *out, uint32_t in[16]) { // chacha20_quarterround(x, 0, 4, 8, 12);
//chacha20_quarterround(x, 1, 5, 9, 13);
//chacha20_quarterround(x, 2, 6, 10, 14);
//chacha20_quarterround(x, 3, 7, 11, 15);
////even round
//chacha20_quarterround(x, 0, 5, 10, 15);
//chacha20_quarterround(x, 1, 6, 11, 12);
//chacha20_quarterround(x, 2, 7, 8, 13);
//chacha20_quarterround(x, 3, 4, 9, 14); 其实这俩的置换是一模一样的
uint32_t x[4][4];
int i;
for (i=0; i<16; ++i) x[i/4][i%4] = in[i];
for (i=0; i<10; ++i) { // 10 double rounds = 20 rounds
// column round: quarter round on each column; start at ith element and wrap
quarter(x[0][0], x[1][0], x[2][0], x[3][0]);
quarter(x[1][1], x[2][1], x[3][1], x[0][1]);
quarter(x[2][2], x[3][2], x[0][2], x[1][2]);
quarter(x[3][3], x[0][3], x[1][3], x[2][3]);
// row round: quarter round on each row; start at ith element and wrap around
quarter(x[0][0], x[0][1], x[0][2], x[0][3]);
quarter(x[1][1], x[1][2], x[1][3], x[1][0]);
quarter(x[2][2], x[2][3], x[2][0], x[2][1]);
quarter(x[3][3], x[3][0], x[3][1], x[3][2]);
}
for (i=0; i<16; ++i) out[i] = x[i/4][i%4] + in[i];
}

// inputting a key, message nonce, keystream index and constants to that transormation
void salsa20_block(uint8_t *out, uint8_t key[32], uint64_t nonce, uint64_t index) {
static const char c[16] = "expand 32-byte k"; // arbitrary constant
#define LE(p) ( (p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24) )
uint32_t in[16] = {LE(c), LE(key), LE(key+4), LE(key+8),
LE(key+12), LE(c+4), nonce&0xffffffff, nonce>>32,
index&0xffffffff, index>>32, LE(c+8), LE(key+16),
LE(key+20), LE(key+24), LE(key+28), LE(c+12)};
uint32_t wordout[16];
salsa20_words(wordout, in);
int i;
for (i=0; i<64; ++i) out[i] = 0xff & (wordout[i/4] >> (8*(i%4)));
}

// enc/dec: xor a message with transformations of key, a per-message nonce and block index
void salsa20(uint8_t *message, uint64_t mlen, uint8_t key[32], uint64_t nonce) {
int i;
uint8_t block[64];
for (i=0; i<mlen; i++) {
if (i%64 == 0) salsa20_block(block, key, nonce, i/64);
message[i] ^= block[i%64];
}
}

//Set 2, vector# 0:
// key = 00000000000000000000000000000000
// 00000000000000000000000000000000
// IV = 0000000000000000
// stream[0..63] = 9A97F65B9B4C721B960A672145FCA8D4
// E32E67F9111EA979CE9C4826806AEEE6
// 3DE9C0DA2BD7F91EBCB2639BF989C625
// 1B29BF38D39A9BDCE7C55F4B2AC12A39

int main () {
uint8_t key[32] = {0};
uint64_t nonce = 0;
uint8_t msg[64] = {0}; // 密文

salsa20(msg, sizeof(msg), key, nonce);
int i; for (i=0; i<sizeof(msg); ++i) printf("%02X ", msg[i]); printf("\n");


printf("\n%d\n",i);
return 0;
}

python 实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
class Salsa:
def __init__(self,r=20):
assert r >= 0
self._r = r # number of rounds
self._mask = 0xffffffff # 32-bit mask

def __call__(self,key=[0]*32,nonce=[0]*8,block_counter=[0]*8):
assert len(key) == 32
assert len(nonce) == 8
assert len(block_counter) == 8

# init state
k = [self._littleendian(key[4*i:4*i+4]) for i in range(8)]
n = [self._littleendian(nonce[4*i:4*i+4]) for i in range(2)]
b = [self._littleendian(block_counter[4*i:4*i+4]) for i in range(2)]
c = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]

s = [c[0], k[0], k[1], k[2],
k[3], c[1], n[0], n[1],
b[0], b[1], c[2], k[4],
k[5], k[6], k[7], c[3]]

# the state
self._s = s[:]

for i in range(self._r):
self._round()

# add initial state to the final one
self._s = [(self._s[i] + s[i]) & self._mask for i in range(16)]

return self._s

def _littleendian(self,b):
assert len(b) == 4
return b[0] ^ (b[1] << 8) ^ (b[2] << 16) ^ (b[3] << 24)

def _round(self):

# quarterround 1
self._s[ 4] ^= self._rotl32((self._s[ 0] + self._s[12]) & self._mask, 7)
self._s[ 8] ^= self._rotl32((self._s[ 0] + self._s[ 4]) & self._mask, 9)
self._s[12] ^= self._rotl32((self._s[ 4] + self._s[ 8]) & self._mask,13)
self._s[ 0] ^= self._rotl32((self._s[ 8] + self._s[12]) & self._mask,18)

# quarterround 2
self._s[ 9] ^= self._rotl32((self._s[ 1] + self._s[ 5]) & self._mask, 7)
self._s[13] ^= self._rotl32((self._s[ 5] + self._s[ 9]) & self._mask, 9)
self._s[ 1] ^= self._rotl32((self._s[ 9] + self._s[13]) & self._mask,13)
self._s[ 5] ^= self._rotl32((self._s[ 1] + self._s[13]) & self._mask,18)

# quarterround 3
self._s[14] ^= self._rotl32((self._s[ 6] + self._s[10]) & self._mask, 7)
self._s[ 2] ^= self._rotl32((self._s[10] + self._s[14]) & self._mask, 9)
self._s[ 6] ^= self._rotl32((self._s[ 2] + self._s[14]) & self._mask,13)
self._s[10] ^= self._rotl32((self._s[ 2] + self._s[ 6]) & self._mask,18)

# quarterround 4
self._s[ 3] ^= self._rotl32((self._s[11] + self._s[15]) & self._mask, 7)
self._s[ 7] ^= self._rotl32((self._s[ 3] + self._s[15]) & self._mask, 9)
self._s[11] ^= self._rotl32((self._s[ 3] + self._s[ 7]) & self._mask,13)
self._s[15] ^= self._rotl32((self._s[ 7] + self._s[11]) & self._mask,18)

# transpose
self._s = [self._s[ 0], self._s[ 4], self._s[ 8], self._s[12],
self._s[ 1], self._s[ 5], self._s[ 9], self._s[13],
self._s[ 2], self._s[ 6], self._s[10], self._s[14],
self._s[ 3], self._s[ 7], self._s[11], self._s[15]]

def _rotl32(self,w,r):
# rotate left for 32-bits
return ( ( ( w << r ) & self._mask) | ( w >> ( 32 - r ) ) )




if __name__ == '__main__':
salsa20 = Salsa()

# vectors = [
# [ [0]*32, [3,1,4,1,5,9,2,6], [7,0,0,0,0,0,0,0], # 这里就是参数!!!
# [ 0xb9a205a3,0x0695e150,0xaa94881a,0xadb7b12c,
# 0x798942d4,0x26107016,0x64edb1a4,0x2d27173f,
# 0xb1c7f1fa,0x62066edc,0xe035fa23,0xc4496f04,
# 0x2131e6b3,0x810bde28,0xf62cb407,0x6bdede3d ] ] ]
vectors = [
[ [0]*32, [0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0]]] # 这里就是参数!!!

for i in range(len(vectors)):
v = vectors[i]
print(f"v[0] => {v[0]}")
print(f"v[1] => {v[1]}")
print(f"v[2] => {v[2]}")
s = salsa20(v[0],v[1],v[2])
stream_key = []
# for i in s:
# print(hex(i),end = " ")

for i in range(len(s)):
stream_key.append(s[i] & 0xff)
stream_key.append((s[i] & 0xff00) >> 8)
stream_key.append((s[i] & 0xff0000) >>16 )
stream_key.append((s[i] & 0xff000000) >>24 )
print()
for i in stream_key:
print(hex(i),end = " ") # 得到的是密钥流