// PureNoise CryptoLib (c) 1997-2004, PureNoise Ltd Vaduz #ifndef _crypto_h_ #define _crypto_h_ #if defined(__MACH__) && defined(__APPLE__) #include #endif #ifndef ECC_BITS //! Strength of Elliptic Curve algorithms is currently about a half the size of its modulus. //! Therefore for 256-bit symmetric keys 512-bit ECC keys are required. For 128-bit symmetric keys 512-bit ECC keys are required. //! \brief Elliptic Curve key size in bits (normally should be defined with -DECC_BITS=n compiler option, minimum 256) #define ECC_BITS 512 #endif //! \brief it's extern for C and it's extern "C" for C++ #ifndef EXTERN #ifdef __cplusplus #define EXTERN extern "C" #else // __cplusplus #define EXTERN extern #endif // __cplusplus #endif // EXTERN #if defined(_M_ALPHA) || defined(__alpha) || defined(_M_IX86) || defined(__i386__) || defined(__vax__) #ifndef LITTLE_ENDIAN #undef BIG_ENDIAN //! \brief defined for Intel/Alpha/Vax (Least Significant First byte order) #define LITTLE_ENDIAN #endif #else #ifndef BIG_ENDIAN #undef LITTLE_ENDIAN //! \brief defined for weird processors like Sparc etc. (Most Significant First byte order) #define BIG_ENDIAN #endif #endif // BIG/LITTLE ENDIAN // basic types and platform-dependant fast rotation and byte swapping functions. I wish there was a bit count operation too... #if defined(__GNUC__) #ifndef __int8 #define __int8 char #endif #ifndef __int16 #define __int16 short #endif #ifndef __int32 #define __int32 long #endif #ifndef __int64 #define __int64 long long #endif #ifndef __cdecl #define __cdecl __attribute__((cdecl)) #endif #ifndef __fastcall #define __fastcall __attribute__((fastcall)) #endif #ifndef __inline #define __inline inline #endif #ifndef __forceinline #define __forceinline __inline__ #endif #ifndef ASM #define ASM __asm__ #endif #elif defined(_MSC_VER) #include #ifndef ASM #define ASM __asm #endif #pragma intrinsic (_lrotr, _lrotl) #elif defined(__MACH__) && defined(__APPLE__) #ifndef __cdecl #define __cdecl #endif #ifndef __fastcall #define __fastcall #endif #ifndef __forceinline #define __forceinline __inline #endif #endif // slow basic rotations and byte swapping, useful for constants or if there's no other choice //! \brief Multiple shifts right 32-bit rotation operation for constants //! \returns x rotated right by n bit #define ROTR32(x, n) ((((unsigned long) (x)) >> ((n) & 31)) | (((unsigned long) (x)) << ((0-(n)) & 31))) //! \brief Multiple shifts left 32-bit rotation operation for constants //! \returns x rotated left by n bit #define ROTL32(x, n) ((((unsigned long) (x)) << ((n) & 31)) | (((unsigned long) (x)) >> ((0-(n)) & 31))) //! \brief Multiple shifts right 64-bit rotation operation for constants //! \returns x rotated right by n bit #define ROTR64(x, n) ((((unsigned __int64) (x)) >> ((n) & 63)) | (((unsigned __int64) (x)) << ((0-(n)) & 63))) //! \brief Multiple shifts left 64-bit rotation operation for constants //! \returns x rotated left by n bit #define ROTL64(x, n) ((((unsigned __int64) (x)) << ((n) & 63)) | (((unsigned __int64) (x)) >> ((0-(n)) & 63))) //! \brief Multiple shifts 32-bit byte swapping operation for constants //! \returns x in the opposite byte order #define BSWAP32(x) ((ROTL32 ((unsigned long) (x), 8) & 0x00FF00FFU) | (ROTR32 ((unsigned long) (x), 8) & 0xFF00FF00U)) //! \brief Multiple shifts 64-bit byte swapping operation for constants //! \returns x in the opposite byte order #if defined (__GNUG__) || defined (__APPLE__) #define BSWAP64(x) ((ROTL64 ((unsigned __int64) (x), 8) & 0x000000ff000000ffLL) | (ROTL64 ((unsigned __int64) (x), 24) & 0x0000ff000000ff00LL) | (ROTR64 ((unsigned __int64) (x), 24) & 0x00ff000000ff0000LL) | (ROTR64 ((unsigned __int64) (x), 8) & 0xff000000ff000000LL)) #else #define BSWAP64(x) ((ROTL64 ((unsigned __int64) (x), 8) & 0x000000ff000000ffUL) | (ROTL64 ((unsigned __int64) (x), 24) & 0x0000ff000000ff00UL) | (ROTR64 ((unsigned __int64) (x), 24) & 0x00ff000000ff0000UL) | (ROTR64 ((unsigned __int64) (x), 8) & 0xff000000ff000000UL)) #endif //! \brief Faster function-based right 32-bit rotation (should not be used for constants) //! \returns x rotated right by n bit #define rotr32(x, n) _lrotr (x, n) // using ROR x,n on Intel //! \brief Faster function-based left 32-bit rotation (should not be used for constants) //! \returns x rotated left by n bit #define rotl32(x, n) _lrotl (x, n) // using ROL x,n on Intel //! \brief Faster function-based right 64-bit rotation (should not be used for constants) //! \returns x rotated right by n bit #define rotr64(x, n) ROTR64 (x, n) // I haven't seen built-in 64-bit rotation functions yet, sticking with slower shifts for now //! \brief Faster function-based left 64-bit rotation (should not be used for constants) //! \returns x rotated left by n bit #define rotl64(x, n) ROTL64 (x, n) // I haven't seen built-in 64-bit rotation functions yet, sticking with slower shifts for now //! \brief Faster function-based 64-bit byte swapping (should not be used for constants) //! \returns x in the opposite byte order #if defined (__GNUG__) || defined (__APPLE__) #define bswap64(x) ((rotl64 ((unsigned __int64) (x), 8) & 0x000000ff000000ffLL) | (rotl64 ((unsigned __int64) (x), 24) & 0x0000ff000000ff00LL) | (rotr64 ((unsigned __int64) (x), 24) & 0x00ff000000ff0000LL) | (rotr64 ((unsigned __int64) (x), 8) & 0xff000000ff000000LL)) #else #define bswap64(x) ((rotl64 ((unsigned __int64) (x), 8) & 0x000000ff000000ffUL) | (rotl64 ((unsigned __int64) (x), 24) & 0x0000ff000000ff00UL) | (rotr64 ((unsigned __int64) (x), 24) & 0x00ff000000ff0000UL) | (rotr64 ((unsigned __int64) (x), 8) & 0xff000000ff000000UL)) #endif #include // is needed by all platforms for memset, memcpy etc //! \def hex_sleep \brief 32-bit sleep function (high 16-bit word represents the number of seconds, low 16-bit word represents the fraction of a second) #if defined (WIN32) || defined (_WIN32) || defined (WIN32_WINNT) || defined (_WIN32_WINNT) || defined (__WIN32__) || defined (WINDOWS) || defined (_WINDOWS) #include #if (_WIN32_WINNT < 0x0400) EXTERN unsigned long __stdcall SwitchToThread (void); #endif #define hex_sleep(n) SleepEx (((n) + 63) >> 6, 1) #define thread_yield() SwitchToThread () #define bzero(a,b) memset (a, 0, b) #define flockfile(x) #define funlockfile(x) #else #include #include #include #ifndef SOCKET #define SOCKET int #endif #ifndef closesocket #define closesocket(a) close(a) #endif #ifndef recv #define recv(a,b,c,d) read(a,b,c) #endif #ifndef _rmtmp #define _rmtmp() #endif #ifndef send #define send(a,b,c,d) write(a,b,c) #endif #ifndef _snprintf #define _snprintf snprintf #endif #ifndef thread_yield #define thread_yield() sched_yield() #endif static __forceinline unsigned long hex_sleep (unsigned long n) { struct timespec req = {n >> 16, (n & 0xFFFFU) * 0x5F5U + 1}; struct timespec rem; nanosleep (&req, &rem); return (rem.tv_sec << 16) + (rem.tv_nsec + 0x5F4U) / 0x5F5U; } #endif //! \brief The most important union for optimal byte/word/dword/qword manipulations #ifndef _OCTET_ #define _OCTET_ typedef union _OCTET { unsigned __int64 Q[1]; signed __int64 O[1]; unsigned long D[2]; signed long L[2]; unsigned short W[4]; signed short S[4]; unsigned char B[8]; signed char C[8]; } OCTET; #endif #define BITCOUNT_TYPE unsigned long #define BIT(C) (((BITCOUNT_TYPE)1)<<((C)&(sizeof(BITCOUNT_TYPE)*8-1))) #define BIT_MASK(C) (((BITCOUNT_TYPE)-1)/(BIT(BIT(C))+1)) #define BIT_COUNT(x,C) (((x)&BIT_MASK(C))+(((x)>>C)&BIT_MASK(C))) static __forceinline BITCOUNT_TYPE bit_count (BITCOUNT_TYPE n) { n = BIT_COUNT (n, 0); n = BIT_COUNT (n, 1); n = BIT_COUNT (n, 2); n = BIT_COUNT (n, 3); n = BIT_COUNT (n, 4); return n; } //! \def bswap32 \brief 32-bit byte swapping for variables (should not be used for constants) //! \def bswap32 \returns x in the opposite byte order //! \def clock_counter \brief the most sensitive time/clock counter available, the best source of randomness //! \def clock_counter \returns processor clock counter #if (defined(_MSC_VER) || defined (__GNUC__)) && (defined (_M_IX86) || defined (__i386__) || defined (i386)) #if defined(_MSC_VER) #define CRYPTO_INLINE_ASM 3 #pragma warning (push) #pragma warning (disable:4035) static __forceinline unsigned long bswap32 (unsigned long x) {ASM {mov eax,x} ASM {bswap eax}} // a faster function implementation for variables static __forceinline unsigned __int64 clock_counter (void) { ASM {_emit 0x0F} ASM {_emit 0x31} } #pragma warning (pop) #else // GCC #define CRYPTO_INLINE_ASM 4 static __forceinline unsigned long _lrotl (unsigned long x, unsigned long r) { return (x << r) | (x >> (32-r)); } static __forceinline unsigned long _lrotr (unsigned long x, unsigned long r) { return (x >> r) | (x << (32-r)); } static __forceinline unsigned long bswap32 (unsigned long x) { ASM ("bswapl %0" : "=r" (x) : "0" (x)); return x; } static __forceinline unsigned __int64 clock_counter (void) { register OCTET r; ASM __volatile__ (".byte 0x0F, 0x31" : "=a" (r.D[0]), "=d" (r.D[1])); return r.Q[0];} #endif #elif defined (__GNUC__) && (defined (sparc) || defined (__sparc) || defined (sun) || defined (__sun)) #define CRYPTO_INLINE_ASM 5 #ifndef bswap32 // need a faster function implementation for variables #define bswap32(x) ((rotl32 ((unsigned long)(x), 8) & 0x00FF00FFU) | (rotr32 ((unsigned long)(x), 8) & 0xFF00FF00U)) #endif #ifdef __sparc_v9__ extern unsigned char clock_counter_type; static __forceinline unsigned __int64 clock_counter (void) { register unsigned long x, y; if (clock_counter_type == 1) { ASM __volatile__ ("rd %%tick, %0; clruw %0, %1; srlx %0, 32, %0" : "=r" (x), "=r" (y) : "0" (x), "1" (y)); return ((unsigned __int64) x << 32) | y; } return gethrtime (); } #else #define clock_counter gethrtime #endif // __sparc_v9__ static __forceinline unsigned long _beginthread (void (__cdecl *proc) (void *), unsigned long stack_size, void *arg) { unsigned int tid = 0xFFFFFFFFU; thr_create (0, stack_size, &proc, arg, 0, &tid); return tid; } #else #ifndef bswap32 /* should be a faster function implementation for variables */ #define bswap32(x) ((rotl32 ((unsigned long)(x), 8) & 0x00FF00FFU) | (rotr32 ((unsigned long)(x), 8) & 0xFF00FF00U)) #endif #if defined(__MACH__) && defined(__APPLE__) // TODO: get the code below fixed #include //#include //extern void clock_get_system_nanotime( // uint32_t *secs, // uint32_t *nanosecs); static __forceinline unsigned __int64 clock_counter (void) { //uint32_t s, ns; //clock_get_system_nanotime( &s, &ns ); //return (((unsigned __int64) s) << 32) | ns; return time(0); } static __forceinline unsigned long _beginthread (void * (*func) (void *), unsigned long stackSize, void *arg) { pthread_t thread; return pthread_create (&thread, 0, func, arg); } #else EXTERN unsigned __int64 clock_counter (void); #include #include extern void clock_get_system_nanotime( uint32_t *secs, uint32_t *nanosecs); static __forceinline unsigned __int64 clock_counter (void) { uint32_t s, ns; clock_get_system_nanotime( &s, &ns ); return (((unsigned __int64) s) << 32) | ns; } static __forceinline unsigned long _beginthread (void * (*func) (void *), unsigned long stackSize, void *arg) { pthread_t thread; return pthread_create (&thread, 0, func, arg); } #endif /* APPLE */ #endif #ifdef CRYPTO_INLINE_ASM #ifdef CRYPTO_NOASM #undef CRYPTO_NOASM #endif #else #ifndef CRYPTO_NOASM #define CRYPTO_NOASM #endif #endif //! \def LSF16 \brief slow 16-bit processor dependant byte ordering for constants //! \def LSF16 \returns unchanged 16 bit x for Intel and byte swapped x for Sparc //! \def LSF32 \brief slow 32-bit processor dependant byte ordering for constants //! \def LSF32 \returns unchanged 32 bit x for Intel and byte swapped x for Sparc //! \def LSF64 \brief slow 64-bit processor dependant byte ordering for constants //! \def LSF64 \returns unchanged 64 bit x for Intel and byte swapped x for Sparc //! \def LSF64D \brief slow 64-bit processor dependant dword ordering for constants //! \def LSF64D \returns unchanged 64 bit x for Intel and 32-bit word swapped x for Sparc //! \def MSF16 \brief slow 16-bit processor dependant byte ordering for constants //! \def MSF16 \returns unchanged 16 bit x for Sparc and byte swapped x for Intel //! \def MSF32 \brief slow 32-bit processor dependant byte ordering for constants //! \def MSF32 \returns unchanged 32 bit x for Sparc and byte swapped x for Intel //! \def MSF64 \brief slow 64-bit processor dependant byte ordering for constants //! \def MSF64 \returns unchanged 64 bit x for Sparc and byte swapped x for Intel //! \def lsf16 \brief fast 16-bit processor dependant byte ordering for variables //! \def lsf16 \returns unchanged 16 bit x for Intel and byte swapped x for Sparc //! \def lsf32 \brief fast 32-bit processor dependant byte ordering for variables //! \def lsf32 \returns unchanged 32 bit x for Intel and byte swapped x for Sparc //! \def lsf64 \brief fast 64-bit processor dependant byte ordering for variables //! \def lsf64 \returns unchanged 64 bit x for Intel and byte swapped x for Sparc //! \def msf16 \brief fast 16-bit processor dependant byte ordering for variables //! \def msf16 \returns unchanged 16 bit x for Sparc and byte swapped x for Intel //! \def msf32 \brief fast 32-bit processor dependant byte ordering for variables //! \def msf32 \returns unchanged 32 bit x for Sparc and byte swapped x for Intel //! \def msf64 \brief fast 64-bit processor dependant byte ordering for variables //! \def msf64 \returns unchanged 64 bit x for Sparc and byte swapped x for Intel //! \def ord2 \brief pair reordering ensuring the same order on any processor //! \def ord2 \returns unchanged index x for Intel and x with the lowest bit in the opposite order for Sparc //! \def ord4 \brief 4 element reordering ensuring the same order on any processor //! \def ord4 \returns unchanged index x for Intel and x with the lowest 2 bit in the opposite order for Sparc //! \def ord8 \brief 8 element reordering ensuring the same order on any processor //! \def ord8 \returns unchanged index x for Intel and x with the lowest 3 bit in the opposite order for Sparc //! \def make_LSF \brief ensures LSF byte order for an array of n 32-bit values //! \def make_LSF \retval x remains unchanged on Intel and n elements of x are byte swapped on Sparc //! \def make_LSF \brief ensures MSF byte order for an array of n 32-bit values //! \def make_MSF \retval x remains unchanged on Sparc and n elements of x are byte swapped on Intel #ifdef LITTLE_ENDIAN #define LSF16(x) (x) #define LSF32(x) (x) #define LSF64(x) (x) #define LSF64D(x) (x) #define MSF16(x) ((((x) & 0xFF) << 8) | (((x) >> 8) & 0xFF)) #define MSF32(x) (BSWAP32 (x)) #define MSF64(x) (BSWAP64 (x)) #define lsf16(x) (x) #define lsf32(x) (x) #define lsf64(x) (x) #define msf16(x) ((((x) & 0xFF) << 8) | (((x) >> 8) & 0xFF)) #define msf32(x) (bswap32 (x)) #define msf64(x) (bswap64 (x)) #define ord2(x) (x) #define ord4(x) (x) #define ord8(x) (x) #define load32(y, x, i, j) (y.D[i] = x->D[j]) #define save32(y, x, i, j) (y->D[i] = x.D[j]) #define make_LSF(x, n) static __forceinline void make_MSF (unsigned long *x, unsigned long n) { register unsigned long i; for (; n; x++, n--) { i = *x; *x = bswap32 (i); } } #endif #ifdef BIG_ENDIAN static __forceinline void make_LSF (unsigned long *x, unsigned long n) { register unsigned long i; for (; n; x++, n--) { i = *x; *x = bswap32 (i); } } #define LSF16(x) ((((x) & 0xFF) << 8) | (((x) >> 8) & 0xFF)) #define LSF32(x) (BSWAP32 (x)) #define LSF64(x) (BSWAP64 (x)) #define LSF64D(x) ((((unsigned __int64) (x)) << 32) | (((unsigned __int64) (x)) >> 32)) #define MSF16(x) (x) #define MSF32(x) (x) #define MSF64(x) (x) #define lsf16(x) ((((x) & 0xFF) << 8) | (((x) >> 8) & 0xFF)) #define lsf32(x) (bswap32 (x)) #define lsf64(x) (bswap64 (x)) #define msf16(x) (x) #define msf32(x) (x) #define msf64(x) (x) #define ord2(x) ((x) ^ 1) #define ord4(x) ((x) ^ 3) #define ord8(x) ((x) ^ 7) #define load32(y, x, i, j) ( y.B[i*4+0] = x->B[j*4+3], y.B[i*4+1] = x->B[j*4+2], y.B[i*4+2] = x->B[j*4+1], y.B[i*4+3] = x->B[j*4+0]) #define save32(y, x, i, j) ( y->B[i*4+3] = x.B[j*4+0], y->B[i*4+2] = x.B[j*4+1], y->B[i*4+1] = x.B[j*4+2], y->B[i*4+0] = x.B[j*4+3]) #define make_MSF(x, n) () #endif #define byte4(x, n) (unsigned char) ((x) >> ((n) << 3)) #define byte4ord(x, n) (unsigned char) ((x) >> (ord4(n) << 3)) #ifndef __max #define __max(a,b) (((a) > (b)) ? (a) : (b)) // should be defined in #endif #ifndef __min #define __min(a,b) (((a) < (b)) ? (a) : (b)) // should be defined in #endif //! \brief extremely fast no API concurrency control //! \pre control has to be global and volatile! //! \param control is a 32-bit variable or a pointer to one //! \param timeout is a 32-bit constant, a number of processor cycles we loop before giving up the thread #define wait_for_availability(control, timeout) { unsigned long c; do c = (unsigned long) clock_counter (); while (c == 0); for (;;) { if ((control == 0) && ((control = c) == c)) break; while (clock_counter () - c < timeout); if ((control == 0) && ((control = c) == c)) break; thread_yield (); } } // this is faster than any other concurrency controlling API //! \brief frees control allowing the nearest wait_for_availability() to fall through #define make_available(control) (control = 0) //! A multiprecision number (big) is an array of words with the first word representing the big's size in words. As Miracl libraries had been optimized for speed, all bigs now require a trailing zero word, therefore for a N-word big, (N + 2) words should be allocated, element [0] should be set to N and element [N+1] should be set to 0. Element [1] of the big is its least significant word and element [N] is its most significant word. For compatibility with processors implementing human-readable byte order (or so-called Network Byte Order), a big number x if stored or transmitted, should be first converted to LSF format using make_LSF (x + 1, x[0]) and then if necessary to ASCII using fast bytes2str before its output and back using str2bytes and make_LSF after its input. You also have to convert a big to LSF before calling setkey_big, encrypt_big or decrypt_big. //! /brief a 32-bit word is the basic multiprecision number element type, currently only 32-bit (unsigned long or unsigned long) is supported. No upgrade to unsigned __int64 is intended any time soon, so we stick with 32-bit words for the time being; defines work better than typedefs on some compilers; #endif