#include <stdint.h>
#include "speck.h"
#ifndef ARM
#include <stdio.h>
#endif

void FuncER16(u16 *x, u16 *y, u16 k)
{

    *x = (((*x)>>(7)) | ((*x)<<(16-(7))));
    *x += *y;

    *x = *x ^ k;

    *y = (((*y)<<(2)) | (*y>>(16-(2))));
    *y = *y ^ *x;
}

#ifdef ARM
void FuncER16_ASM(u16 *x, u16 *y, u16 k)
{

    asm volatile (
"ldr	r3, [r11, #-8]\n\t"
"ldrh	r3, [r3]\n\t"
"lsr	r3, r3, #7\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r3, r3, #16\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-8]\n\t"
"ldrh	r3, [r3]\n\t"
"lsl	r3, r3, #9\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r3, r3, #16\n\t"
"orr	r3, r2, r3\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r3, r3, #16\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-8]\n\t"
"strh	r2, [r3]\n\t"
"ldr	r3, [r11, #-8]\n\t"
"ldrh	r2, [r3]\n\t"
"ldr	r3, [r11, #-12]\n\t"
"ldrh	r3, [r3]\n\t"
"add	r3, r2, r3\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-8]\n\t"
"strh	r2, [r3]\n\t"
"ldr	r3, [r11, #-8]\n\t"
"ldrh	r2, [r3]\n\t"
"ldrh	r3, [r11, #-14]\n\t"
"eor	r3, r3, r2\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-8]\n\t"
"strh	r2, [r3]\n\t"
"ldr	r3, [r11, #-12]\n\t"
"ldrh	r3, [r3]\n\t"
"lsl	r3, r3, #2\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-12]\n\t"
"ldrh	r3, [r3]\n\t"
"lsr	r3, r3, #14\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r3, r3, #16\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r3, r3, #16\n\t"
"orr	r3, r2, r3\n\t"
"lsl	r3, r3, #16\n\t"
"asr	r3, r3, #16\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-12]\n\t"
"strh	r2, [r3]\n\t"
"ldr	r3, [r11, #-12]\n\t"
"ldrh	r2, [r3]\n\t"
"ldr	r3, [r11, #-8]\n\t"
"ldrh	r3, [r3]\n\t"
"eor	r3, r3, r2\n\t"
"lsl	r3, r3, #16\n\t"
"lsr	r2, r3, #16\n\t"
"ldr	r3, [r11, #-12]\n\t"
"strh	r2, [r3]\n\t"

    );

}
#endif


void Words16ToBytes(u16 words[],u8 bytes[],int numwords)
{
    int i,j=0;
    for(i=0;i<numwords;i++){
        bytes[j]=(u8)words[i];
        bytes[j+1]=(u8)(words[i]>>8);
        j+=2;
    }

}

void BytesToWords16(u8 bytes[],u16 words[],int numbytes)
{
    int i,j=0; for(i=0;i<numbytes/2;i++){
        words[i]=(u16)bytes[j] | ((u16)bytes[j+1]<<8);
        j+=2;
    }
}

void Speck3264KeySchedule(u16 K[],u16 rk[])
{
    u16 i,D=K[3],C=K[2],B=K[1],A=K[0];
    for(i=0;i<22;){
        rk[i]=A;
        ER16(B,A,i++);
        rk[i]=A;
        ER16(C,A,i++);
        rk[i]=A;
        ER16(D,A,i++);
    }
}


void Speck3264Encrypt(u16 Pt[],u16 Ct[],u16 rk[])
{
    u16 i;
    Ct[0]=Pt[0]; Ct[1]=Pt[1];

    // full 22  rounds
    for(i=0;i<22;) {
        //printf("ER16( c1=0x%x, c0=0x%x, k=0x%x ) = ", Ct[1], Ct[0], rk[i]);
        //
#ifdef ARM

        FuncER16_ASM(&Ct[1], &Ct[0], rk[i++]);
#else
        //ER16(Ct[1], Ct[0], rk[i++]);

        FuncER16(&Ct[1], &Ct[0], rk[i++]);
        printf("( c1=0x%x, c0=0x%x, k=0x%x )\n", Ct[1], Ct[0], rk[i]);
#endif
    }
}


void Speck3264Decrypt(u16 Pt[],u16 Ct[],u16 rk[])
{
    int i;
    Pt[0]=Ct[0]; Pt[1]=Ct[1];

    for(i=21;i>=0;) DR16(Pt[1],Pt[0],rk[i--]);
}


void Speck3264_EncryptBlock(u8 pt[], u8 k[], u8 ct[]) {

    uint16_t Pt[2] = {0};
    uint16_t K[4] = {0};
    uint16_t rk[34] = {0};
    uint16_t Ct[2] = {0};

    BytesToWords16(pt,Pt, 8);
    BytesToWords16(k,K, 16);


    Speck3264KeySchedule(K,rk);
#ifndef ARM
    for (int i=0; i < 16; i++)
    {
        printf("Key: 0x%x\n", rk[i]);
    }
#endif


    Speck3264Encrypt(Pt,Ct,rk);
    Words16ToBytes(Ct, ct, 2);

}

int main() {


    u8 pt[4] = {0x4c, 0x69, 0x74, 0x65};
    u8 key[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88};

    u8 ct[4] = {0x0};
    Speck3264_EncryptBlock(pt, key, ct);

#ifndef ARM
        printf("[[ Speck 32/64  ]]\n");

    printf("The output: \n");
    for (int i = 0; i < 4; i++) {
        printf("- %08x\n", ct[i]);
    }
#endif


    return 0;
}