salsa20 Algorithm

The Salsa20 algorithm is a stream cipher designed by cryptographer Daniel J. Bernstein in 2005. It is known for its speed, simplicity, and strong security properties. The algorithm is based on a pseudorandom function that generates a keystream, which is then XORed with the plaintext to produce the ciphertext. It utilizes a 256-bit key, a 64-bit nonce (number used once), and a 64-bit block counter to create a unique keystream for each message. Salsa20 operates in 20 rounds, which provide a good trade-off between security and performance. The core of the algorithm is a set of simple operations, such as addition, bitwise exclusive OR (XOR), and constant-distance rotation, which are applied to a 4x4 matrix of 32-bit words. One of the notable features of Salsa20 is that it is resistant to known cryptographic attacks, such as differential and linear cryptanalysis, due to its large number of rounds and the diffusion properties of its operations. It has been extensively analyzed and is considered to be secure for use in various cryptographic applications, including encryption, authentication, and random number generation. Salsa20 has also inspired the development of other popular stream ciphers, such as ChaCha20, which is a variant of Salsa20 with a slightly modified round function and increased diffusion. Both Salsa20 and ChaCha20 are widely used in modern cryptography and have been adopted in several cryptographic libraries and protocols, including the Noise protocol framework and the Tor anonymity network.
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use buffer::{BufferResult, RefReadBuffer, RefWriteBuffer};
use symmetriccipher::{Encryptor, Decryptor, SynchronousStreamCipher, SymmetricCipherError};
use cryptoutil::{read_u32_le, symm_enc_or_dec, write_u32_le, xor_keystream};
use simd::u32x4;

use std::cmp;

#[derive(Clone, Copy)]
struct SalsaState {
  a: u32x4,
  b: u32x4,
  c: u32x4,
  d: u32x4
}

#[derive(Copy)]
pub struct Salsa20 {
    state: SalsaState,
    output: [u8; 64],
    offset: usize,
}

impl Clone for Salsa20 { fn clone(&self) -> Salsa20 { *self } }

const S7:u32x4 = u32x4(7, 7, 7, 7);
const S9:u32x4 = u32x4(9, 9, 9, 9);
const S13:u32x4 = u32x4(13, 13, 13, 13);
const S18:u32x4 = u32x4(18, 18, 18, 18);
const S32:u32x4 = u32x4(32, 32, 32, 32);

macro_rules! prepare_rowround {
    ($a: expr, $b: expr, $c: expr) => {{
        let u32x4(a10, a11, a12, a13) = $a;
        $a = u32x4(a13, a10, a11, a12);
        let u32x4(b10, b11, b12, b13) = $b;
        $b = u32x4(b12, b13, b10, b11);
        let u32x4(c10, c11, c12, c13) = $c;
        $c = u32x4(c11, c12, c13, c10);
    }}
}

macro_rules! prepare_columnround {
    ($a: expr, $b: expr, $c: expr) => {{
        let u32x4(a13, a10, a11, a12) = $a;
        $a = u32x4(a10, a11, a12, a13);
        let u32x4(b12, b13, b10, b11) = $b;
        $b = u32x4(b10, b11, b12, b13);
        let u32x4(c11, c12, c13, c10) = $c;
        $c = u32x4(c10, c11, c12, c13);
    }}
}

macro_rules! add_rotate_xor {
    ($dst: expr, $a: expr, $b: expr, $shift: expr) => {{
        let v = $a + $b;
        let r = S32 - $shift;
        let right = v >> r;
        $dst = $dst ^ (v << $shift) ^ right
    }}
}

fn columnround(state: &mut SalsaState) -> () {
    add_rotate_xor!(state.a, state.d, state.c, S7);
    add_rotate_xor!(state.b, state.a, state.d, S9);
    add_rotate_xor!(state.c, state.b, state.a, S13);
    add_rotate_xor!(state.d, state.c, state.b, S18);
}

fn rowround(state: &mut SalsaState) -> () {
    add_rotate_xor!(state.c, state.d, state.a, S7);
    add_rotate_xor!(state.b, state.c, state.d, S9);
    add_rotate_xor!(state.a, state.c, state.b, S13);
    add_rotate_xor!(state.d, state.a, state.b, S18);
}

impl Salsa20 {
    pub fn new(key: &[u8], nonce: &[u8]) -> Salsa20 {
        assert!(key.len() == 16 || key.len() == 32);
        assert!(nonce.len() == 8);
        Salsa20 { state: Salsa20::expand(key, nonce), output: [0; 64], offset: 64 }
    }

    pub fn new_xsalsa20(key: &[u8], nonce: &[u8]) -> Salsa20 {
        assert!(key.len() == 32);
        assert!(nonce.len() == 24);
        let mut xsalsa20 = Salsa20 { state: Salsa20::expand(key, &nonce[0..16]), output: [0; 64], offset: 64 };

        let mut new_key = [0; 32];
        xsalsa20.hsalsa20_hash(&mut new_key);
        xsalsa20.state = Salsa20::expand(&new_key, &nonce[16..24]);

        xsalsa20
    }

    fn expand(key: &[u8], nonce: &[u8]) -> SalsaState {
        let constant = match key.len() {
            16 => b"expand 16-byte k",
            32 => b"expand 32-byte k",
            _  => unreachable!(),
        };

        // The state vectors are laid out to facilitate SIMD operation,
        // instead of the natural matrix ordering.
        //
        //  * Constant (x0, x5, x10, x15)
        //  * Key (x1, x2, x3, x4, x11, x12, x13, x14)
        //  * Input (x6, x7, x8, x9)

        let key_tail; // (x11, x12, x13, x14)
        if key.len() == 16 {
            key_tail = key;
        } else {
            key_tail = &key[16..32];
        }

        let x8; let x9; // (x8, x9)
        if nonce.len() == 16 {
            // HSalsa uses the full 16 byte nonce.
            x8 = read_u32_le(&nonce[8..12]);
            x9 = read_u32_le(&nonce[12..16]);
        } else {
            x8 = 0;
            x9 = 0;
        }

        SalsaState {
            a: u32x4(
                read_u32_le(&key[12..16]),      // x4
                x9,                             // x9
                read_u32_le(&key_tail[12..16]), // x14
                read_u32_le(&key[8..12]),       // x3
            ),
            b: u32x4(
                x8,                             // x8
                read_u32_le(&key_tail[8..12]),  // x13
                read_u32_le(&key[4..8]),        // x2
                read_u32_le(&nonce[4..8])       // x7
            ),
            c: u32x4(
                read_u32_le(&key_tail[4..8]),   // x12
                read_u32_le(&key[0..4]),        // x1
                read_u32_le(&nonce[0..4]),      // x6
                read_u32_le(&key_tail[0..4])    // x11
            ),
            d: u32x4(
                read_u32_le(&constant[0..4]),   // x0
                read_u32_le(&constant[4..8]),   // x5
                read_u32_le(&constant[8..12]),  // x10
                read_u32_le(&constant[12..16]), // x15
            )
        }
    }

    fn hash(&mut self) {
        let mut state = self.state;
        for _ in 0..10 {
            columnround(&mut state);
            prepare_rowround!(state.a, state.b, state.c);
            rowround(&mut state);
            prepare_columnround!(state.a, state.b, state.c);
        }
        let u32x4(x4, x9, x14, x3) = self.state.a + state.a;
        let u32x4(x8, x13, x2, x7) = self.state.b + state.b;
        let u32x4(x12, x1, x6, x11) = self.state.c + state.c;
        let u32x4(x0, x5, x10, x15) = self.state.d + state.d;
        let lens = [
             x0,  x1,  x2,  x3,
             x4,  x5,  x6,  x7,
             x8,  x9, x10, x11,
            x12, x13, x14, x15
        ];
        for i in 0..lens.len() {
            write_u32_le(&mut self.output[i*4..(i+1)*4], lens[i]);
        }

        self.state.b = self.state.b + u32x4(1, 0, 0, 0);
        let u32x4(_, _, _, ctr_lo) = self.state.b;
        if ctr_lo == 0 {
            self.state.a = self.state.a + u32x4(0, 1, 0, 0);
        }

        self.offset = 0;
    }

    fn hsalsa20_hash(&mut self, out: &mut [u8]) {
        let mut state = self.state;
        for _ in 0..10 {
            columnround(&mut state);
            prepare_rowround!(state.a, state.b, state.c);
            rowround(&mut state);
            prepare_columnround!(state.a, state.b, state.c);
        }
        let u32x4(_, x9, _, _) = state.a;
        let u32x4(x8, _, _, x7) = state.b;
        let u32x4(_, _, x6, _) = state.c;
        let u32x4(x0, x5, x10, x15) = state.d;
        let lens = [
            x0, x5, x10, x15,
            x6, x7, x8, x9
        ];
        for i in 0..lens.len() {
            write_u32_le(&mut out[i*4..(i+1)*4], lens[i]);
        }
    }
}

impl SynchronousStreamCipher for Salsa20 {
    fn process(&mut self, input: &[u8], output: &mut [u8]) {
        assert!(input.len() == output.len());
        let len = input.len();
        let mut i = 0;
        while i < len {
            // If there is no keystream available in the output buffer,
            // generate the next block.
            if self.offset == 64 {
                self.hash();
            }

            // Process the min(available keystream, remaining input length).
            let count = cmp::min(64 - self.offset, len - i);
            xor_keystream(&mut output[i..i+count], &input[i..i+count], &self.output[self.offset..]);
            i += count;
            self.offset += count;
        }
    }
}

impl Encryptor for Salsa20 {
    fn encrypt(&mut self, input: &mut RefReadBuffer, output: &mut RefWriteBuffer, _: bool)
            -> Result<BufferResult, SymmetricCipherError> {
        symm_enc_or_dec(self, input, output)
    }
}

impl Decryptor for Salsa20 {
    fn decrypt(&mut self, input: &mut RefReadBuffer, output: &mut RefWriteBuffer, _: bool)
            -> Result<BufferResult, SymmetricCipherError> {
        symm_enc_or_dec(self, input, output)
    }
}

pub fn hsalsa20(key: &[u8], nonce: &[u8], out: &mut [u8]) {
    assert!(key.len() == 32);
    assert!(nonce.len() == 16);
    let mut h = Salsa20 { state: Salsa20::expand(key, nonce), output: [0; 64], offset: 64 };
    h.hsalsa20_hash(out);
}

#[cfg(test)]
mod test {
    use std::iter::repeat;

    use salsa20::Salsa20;
    use symmetriccipher::SynchronousStreamCipher;

    use digest::Digest;
    use sha2::Sha256;

    #[test]
    fn test_salsa20_128bit_ecrypt_set_1_vector_0() {
        let key = [128u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
        let nonce = [0u8; 8];
        let input = [0u8; 64];
        let mut stream = [0u8; 64];
        let result =
            [0x4D, 0xFA, 0x5E, 0x48, 0x1D, 0xA2, 0x3E, 0xA0,
             0x9A, 0x31, 0x02, 0x20, 0x50, 0x85, 0x99, 0x36,
             0xDA, 0x52, 0xFC, 0xEE, 0x21, 0x80, 0x05, 0x16,
             0x4F, 0x26, 0x7C, 0xB6, 0x5F, 0x5C, 0xFD, 0x7F,
             0x2B, 0x4F, 0x97, 0xE0, 0xFF, 0x16, 0x92, 0x4A,
             0x52, 0xDF, 0x26, 0x95, 0x15, 0x11, 0x0A, 0x07,
             0xF9, 0xE4, 0x60, 0xBC, 0x65, 0xEF, 0x95, 0xDA,
             0x58, 0xF7, 0x40, 0xB7, 0xD1, 0xDB, 0xB0, 0xAA];

        let mut salsa20 = Salsa20::new(&key, &nonce);
        salsa20.process(&input, &mut stream);
        assert!(stream[..] == result[..]);
    }

    #[test]
    fn test_salsa20_256bit_ecrypt_set_1_vector_0() {
        let key =
            [128u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
        let nonce = [0u8; 8];
        let input = [0u8; 64];
        let mut stream = [0u8; 64];
        let result =
            [0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3,
             0xEA, 0x8E, 0xF9, 0x47, 0x5B, 0x29, 0xA6, 0xE7,
             0x00, 0x39, 0x51, 0xE1, 0x09, 0x7A, 0x5C, 0x38,
             0xD2, 0x3B, 0x7A, 0x5F, 0xAD, 0x9F, 0x68, 0x44,
             0xB2, 0x2C, 0x97, 0x55, 0x9E, 0x27, 0x23, 0xC7,
             0xCB, 0xBD, 0x3F, 0xE4, 0xFC, 0x8D, 0x9A, 0x07,
             0x44, 0x65, 0x2A, 0x83, 0xE7, 0x2A, 0x9C, 0x46,
             0x18, 0x76, 0xAF, 0x4D, 0x7E, 0xF1, 0xA1, 0x17];

        let mut salsa20 = Salsa20::new(&key, &nonce);
        salsa20.process(&input, &mut stream);
        assert!(stream[..] == result[..]);
    }

    #[test]
    fn test_salsa20_256bit_nacl_vector_2() {
        let key = [
            0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9,
            0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88,
            0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9,
            0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
        ];
        let nonce = [
            0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
        ];
        let input: Vec<u8> = repeat(0).take(4194304).collect();
        let mut stream: Vec<u8> = repeat(0).take(input.len()).collect();
        let output_str = "662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2";

        let mut salsa20 = Salsa20::new(&key, &nonce);
        salsa20.process(input.as_ref(), &mut stream);

        let mut sh = Sha256::new();
        sh.input(stream.as_ref());
        let out_str = sh.result_str();
        assert!(&out_str[..] == output_str);
    }

    #[test]
    fn test_xsalsa20_cryptopp() {
        let key =
            [0x1b, 0x27, 0x55, 0x64, 0x73, 0xe9, 0x85, 0xd4,
             0x62, 0xcd, 0x51, 0x19, 0x7a, 0x9a, 0x46, 0xc7,
             0x60, 0x09, 0x54, 0x9e, 0xac, 0x64, 0x74, 0xf2,
             0x06, 0xc4, 0xee, 0x08, 0x44, 0xf6, 0x83, 0x89];
        let nonce =
            [0x69, 0x69, 0x6e, 0xe9, 0x55, 0xb6, 0x2b, 0x73,
             0xcd, 0x62, 0xbd, 0xa8, 0x75, 0xfc, 0x73, 0xd6,
             0x82, 0x19, 0xe0, 0x03, 0x6b, 0x7a, 0x0b, 0x37];
        let input = [0u8; 139];
        let mut stream = [0u8; 139];
        let result =
            [0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
             0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
             0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
             0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80,
             0x30, 0x9e, 0x64, 0x5a, 0x74, 0xe9, 0xe0, 0xa6,
             0x0d, 0x82, 0x43, 0xac, 0xd9, 0x17, 0x7a, 0xb5,
             0x1a, 0x1b, 0xeb, 0x8d, 0x5a, 0x2f, 0x5d, 0x70,
             0x0c, 0x09, 0x3c, 0x5e, 0x55, 0x85, 0x57, 0x96,
             0x25, 0x33, 0x7b, 0xd3, 0xab, 0x61, 0x9d, 0x61,
             0x57, 0x60, 0xd8, 0xc5, 0xb2, 0x24, 0xa8, 0x5b,
             0x1d, 0x0e, 0xfe, 0x0e, 0xb8, 0xa7, 0xee, 0x16,
             0x3a, 0xbb, 0x03, 0x76, 0x52, 0x9f, 0xcc, 0x09,
             0xba, 0xb5, 0x06, 0xc6, 0x18, 0xe1, 0x3c, 0xe7,
             0x77, 0xd8, 0x2c, 0x3a, 0xe9, 0xd1, 0xa6, 0xf9,
             0x72, 0xd4, 0x16, 0x02, 0x87, 0xcb, 0xfe, 0x60,
             0xbf, 0x21, 0x30, 0xfc, 0x0a, 0x6f, 0xf6, 0x04,
             0x9d, 0x0a, 0x5c, 0x8a, 0x82, 0xf4, 0x29, 0x23,
             0x1f, 0x00, 0x80];

        let mut xsalsa20 = Salsa20::new_xsalsa20(&key, &nonce);
        xsalsa20.process(&input, &mut stream);
        assert!(stream[..] == result[..]);
    }
}

#[cfg(all(test, feature = "with-bench"))]
mod bench {
    use test::Bencher;
    use symmetriccipher::SynchronousStreamCipher;
    use salsa20::Salsa20;

    #[bench]
    pub fn salsa20_10(bh: & mut Bencher) {
        let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
        let input = [1u8; 10];
        let mut output = [0u8; 10];
        bh.iter( || {
            salsa20.process(&input, &mut output);
        });
        bh.bytes = input.len() as u64;
    }

    #[bench]
    pub fn salsa20_1k(bh: & mut Bencher) {
        let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
        let input = [1u8; 1024];
        let mut output = [0u8; 1024];
        bh.iter( || {
            salsa20.process(&input, &mut output);
        });
        bh.bytes = input.len() as u64;
    }

    #[bench]
    pub fn salsa20_64k(bh: & mut Bencher) {
        let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
        let input = [1u8; 65536];
        let mut output = [0u8; 65536];
        bh.iter( || {
            salsa20.process(&input, &mut output);
        });
        bh.bytes = input.len() as u64;
    }
}

LANGUAGE:

DARK MODE: