Harkonnen Modérateur Un modo pour les bannir tous | koo koo
j'ai recodé mon md5
Code :
- %include "common.inc"
- ;%define LOOPS 2097152
- ;%define LOOPS 1000000
- ;%define UNROLL 8
- %define DO_P4 1
- %if DO_P4
- %define USE_PSHUFD 0
- %define USE_PSHUFD_ROL 1 ; On P4's doing a PSHUFD is faster than a MOVDQA in some circumstances.
- %define CACHE_LINE_SIZE 64
- %else
- %define USE_PSHUFD 0
- %define USE_PSHUFD_ROL 0
- %define CACHE_LINE_SIZE 32
- %endif ; %if DO_P4
- %define PHASE1 256
- %define PHASE2 PHASE1 + 64
- %define S11 7
- %define S12 12
- %define S13 17
- %define S14 22
- %define S21 5
- %define S22 9
- %define S23 14
- %define S24 20
- %define S31 4
- %define S32 11
- %define S33 16
- %define S34 23
- %define S41 6
- %define S42 10
- %define S43 15
- %define S44 21
- ; the macros we will use assume the triple-blocks are in
- ; XMM0-XMM3 and EAX,EBX,ECX,EDX and the input block is accessed via EBP [e.g. 16 64-bit words and 16 32bit words]
- ; called as A,B,C,D,a,b,c,d,rot,text_index1,text_index2,sin_index,constant1,constant2,PHASE
- ;#define F(b, c, d) ((b & c) | (~b & d))
- ;#define F(b, c, d) (d ^ (b & (c ^ d)))
- ; (a) += F ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ;; FF XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX,S11, 0, 0,0d76aa478h
- ;; FF XMM3,XMM0,XMM1,XMM2, EDX,EAX,EBX,ECX,S12, 1, 1,0e8c7b756h
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Ff 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- ;try switching the above PADDD with the SIN_INDEX PADDD, to spread out the memory accesses more.
- MOVDQA XMM4, C ; load c
- MOV ESI, c ; load c : u32
- mov edi,[EBP + alutext_index1 * 4 + PHASE] ; load x : u32
- PADDD A, [EBP + mmxtext_index * 16] ; A = A + x
- XOR ESI, d ; c ^ d : u32
- add a, edi ; a = a + x
- PXOR XMM4, D ; c ^ d
- AND ESI,b ; b & (c ^ d) : u32
- ADD a, constant1 ; add the constants, sin values, a = a + x + ac : u32
- PAND XMM4, B ; b & (c ^ d)
- XOR ESI, d ; d ^ (b & (c ^ d)) : u32
- ADD a,ESI ; add to a, a = a + F(b, c, d) + x + ac : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- ROL a, alurot1 ; rotate left : u32
- MOV ESI, c2 ; load c : u32
- PXOR XMM4, D ; d ^ (b & (c ^ d))
- ADD a, b ; add b : u32
- mov edi,[EBP + alutext_index2 * 4 + PHASE] ; load x : u32
- PADDD A,XMM4 ; add to a, a = a + F(b,c,d) + x + ac
- XOR ESI, d2 ; c & d : u32
- add a2, edi ; a = a + x : u32
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- AND ESI,b2 ; b & (c ^ d) : u32
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- PSLLD A, mmxrot ; shift a left
- XOR ESI, d2 ; d ^ (b & (c ^ d)) : u32
- ADD a2,ESI ; add to a, a = a + F(b, c, d) + x + ac : u32
- PSRLD XMM6,32-mmxrot ; shift XMM4 right
- ROL a2, alurot2 ; rotate left : u32
- ADD a2, b2 ; add b : u32
- POR A,XMM6 ; OR together
- PADDD A, B ; add b
- %endmacro
- ; the macros we will use assume the triple-blocks are in
- ; XMM0-XMM3 and EAX,EBX,ECX,EDX and the input block is accessed via EBP [e.g. 16 64-bit words and 16 32bit words]
- ; called as A,B,C,D,a,b,c,d,rot,text_index1,text_index2,sin_index,constant1,constant2,PHASE
- ;#define F(b, c, d) ((b & c) | (~b & d))
- ;#define F(b, c, d) (d ^ (b & (c ^ d)))
- ; (a) += F ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ;; FF XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX,S11, 0, 0,0d76aa478h
- ;; FF XMM3,XMM0,XMM1,XMM2, EDX,EAX,EBX,ECX,S12, 1, 1,0e8c7b756h
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Fg 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, C ; load c
- mov esi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- MOV EDI, d ; load d : u32
- PADDD A, [EBP + mmxtext_index * 16] ; A = A + X
- add a, esi ; a = a + x
- NOT EDI ; ~d
- PXOR XMM4, D ; c ^ d
- MOV ESI, d ; load d : u32
- AND EDI, c ; ~d & c : u32
- PAND XMM4, B ; b & (c ^ d)
- AND ESI, b ; b & d : u32
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- OR ESI, EDI ; G(b,c,d) = (b & d) | (~d & c)
- ADD a, ESI ; add together, G(b,c,d) + x + ac : u32
- PXOR XMM4, D ; d ^ (b & (c ^ d))
- ROL a, alurot1 ; rotate left : u32
- ADD a, b ; add b : u32
- PADDD A,XMM4 ; add to a, a = a + F(b,c,d) + x + ac
- mov esi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- MOV EDI, d2 ; load d : u32
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- add a2, esi
- NOT EDI ; ~d
- PSLLD A, mmxrot ; shift a left
- MOV ESI, d2 ; load d : u32
- AND EDI, c2 ; ~d & c : u32
- PSRLD XMM6,32-mmxrot ; shift XMM4 right
- AND ESI, b2 ; b & d : u32
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- POR A,XMM6 ; OR together
- OR ESI, EDI ; G(b,c,d) = (b & d) | (~d & c)
- ADD a2, ESI ; add together, G(b,c,d) + x + ac : u32
- PADDD A, B ; add b
- ROL a2, alurot2 ; rotate left : u32
- ADD a2, b2 ; add b : u32
- %endmacro
- ;#define G(b, c, d) ((b & d) | (c & ~d))
- ;#define G(b, c, d) (c ^ (d & (b ^ c)))
- ; (a) += G ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Gh 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, D ; load d
- mov edi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- MOVDQA XMM5, D ; load d
- MOV ESI, d ; load d : u32
- ADD a, edi ; a = a + x
- PADDD A, [EBP + mmxtext_index * 16] ; A = A + X
- XOR ESI, c ; d ^ c : u32
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- PXOR XMM5, [_all_one] ; ~d
- XOR ESI, b ; H(b,c,d) = (b ^ c ^ d) : u32
- ADD a, ESI ; add to a, a = a + H(b,c,d) + x + ac : u32
- PAND XMM4, B ; b & d
- ROL a, alurot1 ; rotate left : u32
- PAND XMM5, C ; ~d & c
- ADD a, b ; add b : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- mov edi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- POR XMM4, XMM5 ; (b & d) | (~d & c)
- MOV ESI, d2 ; load d : u32
- ADD a2, edi ; load the text, x : u32
- PADDD A, XMM4 ; add together, G(b,c,d) + x + ac
- XOR ESI, c2 ; d ^ c : u32
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- XOR ESI, b2 ; H(b,c,d) = (b ^ c ^ d) : u32
- ADD a2, ESI ; add to a, a = a + H(b,c,d) + x + ac : u32
- PSLLD A, mmxrot ; shift a left
- ROL a2, alurot2 ; rotate left : u32
- PSRLD XMM6, 32-mmxrot ; shift XMM4 right
- ADD a2, b2 ; add b : u32
- POR A, XMM6 ; OR together
- PADDD A, B ; add b
- %endmacro
- ;#define G(b, c, d) ((b & d) | (c & ~d))
- ;#define G(b, c, d) (c ^ (d & (b ^ c)))
- ; (a) += G ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Gi 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- ;#define I(x, y, z) ((y) ^ ((x) | (~z)))
- ;#define II(a, b, c, d, x, s, ac)
- ;%if 0
- MOVDQA XMM4, D ; load d
- MOV ESI, d ; load d : u32
- mov edi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- MOVDQA XMM5, D ; load d
- NOT ESI ; ~d : u32
- ADD a, edi ; a = a + x
- PADDD A, [_sin_const + sin_index * 16] ; A = A + AC
- OR ESI, b ; b | ~d : u32
- ADD a, constant1 ; a = a + x + ac
- PXOR XMM5, [_all_one] ; ~d
- XOR ESI,c ; I(b,c,d) = c ^ (b | ~d) : u32
- ADD a, ESI ; add to a, a = a + I(b,c,d) + x + ac : u32
- PAND XMM4, B ; b & d
- ROL a, alurot1 ; rotate left : u32
- PAND XMM5, C ; ~d & c
- MOV ESI, d2 ; load d : u32
- ADD a, b ; add b : u32
- PADDD A, [EBP + mmxtext_index * 16] ; A = A + AC + X
- mov edi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- NOT ESI ; ~d : u32
- POR XMM4, XMM5 ; (b & d) | (~d & c)
- ADD a2, edi ; a = a + x
- OR ESI, b2 ; b | ~d : u32
- PADDD A, XMM4 ; add together, G(b,c,d) + x + ac
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- XOR ESI,c2 ; I(b,c,d) = c ^ (b | ~d) : u32
-
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- ADD a2,ESI ; add to a, a = a + I(b,c,d) + x + ac : u32
- PSLLD A, mmxrot ; shift a left
- ROL a2, alurot2 ; rotate left : u32
- PSRLD XMM6, 32-mmxrot ; shift XMM4 right
- ADD a2, b2 ; add b : u32
- POR A, XMM6 ; OR together
- PADDD A, B ; add b
- %endmacro
- ;#define H(x, y, z) ((x) ^ (y) ^ (z))
- ;#define HH(a, b, c, d, x, s, ac) \
- ; (a) += H ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Hf 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, D ; load d
- MOV ESI, c ; load c : u32
- mov edi,[EBP + alutext_index1 * 4 + PHASE] ; load x
- PADDD A, [EBP + mmxtext_index * 16] ; A = A + X
- XOR ESI, d ; c ^ d : u32
- add a, edi ; a = a + x
- PXOR XMM4, C ; d ^ c
- AND ESI,b ; b & (c ^ d) : u32
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- XOR ESI, d ; d ^ (b & (c ^ d)) : u32
- ADD a, ESI ; add to a, a = a + F(b, c, d) + x + ac : u32
- PXOR XMM4, B ; H(b,c,d) = (b ^ c ^ d)
- MOV ESI, c2 ; load c : u32
- ROL a, alurot1 ; rotate left : u32
- PADDD A, XMM4 ; add to a, a = a + H(b,c,d) + x + ac
- mov edi,[EBP + alutext_index2 * 4 + PHASE] ; load x
- ADD a, b ; add b : u32
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- XOR ESI, d2 ; c & d : u32
- add a2, edi ; a = a + x : u32
- PSLLD A, mmxrot ; shift a left
- AND ESI,b2 ; b & (c ^ d) : u32
- ADD a2, constant2 ; a = a + x + ac : u32
- PSRLD XMM6,32-mmxrot ; shift XMM4 right
- XOR ESI, d2 ; d ^ (b & (c ^ d)) : u32
- ADD a2, ESI ; add to a, a = a + F(b, c, d) + x + ac : u32
- POR A,XMM6 ; OR together
- ROL a2, alurot2 ; rotate left : u32
- PADDD A, B ; a = a + b
- ADD a2, b2 ; add b : u32
- %endmacro
- ;#define H(x, y, z) ((x) ^ (y) ^ (z))
- ;#define HH(a, b, c, d, x, s, ac) \
- ; (a) += H ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Hg 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, D ; load d
- mov esi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- MOV EDI, c ; load c : u32
- PADDD A, [EBP + mmxtext_index * 16] ; load the text, x
- add a, esi ; a = a + x
- XOR EDI, b ; b ^ c
- PXOR XMM4, C ; d ^ c
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- AND EDI, d ; d & (b ^ c)
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- XOR EDI, c ; G(b,c,d) = c ^ (d & (b ^ c))
- mov esi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- PXOR XMM4, B ; H(b,c,d) = (b ^ c ^ d)
- ADD a, EDI ; add together, G(b,c,d) + x + ac : u32
- ROL a, alurot1 ; a = a ROL ?
- PADDD A, XMM4 ; add to a, a = a + H(b,c,d) + x + ac
- MOV EDI, c2 ; load c : u32
- ADD a, b ; a = a + b
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- XOR EDI, b2 ; c ^ b
- add a2, esi ; a = a + x
- PSLLD A, mmxrot ; shift a left
- AND EDI, d2 ; d & (c ^ b)
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- PSRLD XMM6,32-mmxrot ; shift XMM6 right
- XOR EDI, c2 ; G(b,c,d) = c ^ (d & (b ^ c)) = (b & d) | (~d & c)
- ADD a2, EDI ; add together, G(b,c,d) + x + ac : u32
- POR A,XMM6 ; OR together
- ROL a2, alurot2 ; rotate left : u32
- ADD a2, b2 ; add b : u32
- PADDD A, B ; add b
- %endmacro
- ;#define I(x, y, z) ((y) ^ ((x) | (~z)))
- ;#define II(a, b, c, d, x, s, ac) \
- ; (a) += I ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Ih 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, D ; load d
- mov edi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- MOV ESI, d ; load d : u32
- PXOR XMM4,[_all_one] ; ~d
- ADD a, edi ; load the text, x : u32
- XOR ESI, c ; d ^ c : u32
- PADDD A, [EBP + mmxtext_index * 16] ; load the text, x
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- XOR ESI, b ; H(b,c,d) = (b ^ c ^ d) : u32
- POR XMM4, B ; b | ~d
- ADD a, ESI ; add to a, a = a + H(b,c,d) + x + ac : u32
- ROL a, alurot1 ; rotate left : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- ADD a, b ; add b : u32
- mov edi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- PXOR XMM4,C ; c ^ (b | ~d)
- MOV ESI, d2 ; load d : u32
- ADD a2, edi ; load the text, x : u32
- PADDD A,XMM4 ; add to a, a = a + I(b,c,d) + x + ac
- XOR ESI, c2 ; d ^ c : u32
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- XOR ESI, b2 ; H(b,c,d) = (b ^ c ^ d) : u32
- PSLLD A, mmxrot ; shift a left
- ADD a2, ESI ; add to a, a = a + H(b,c,d) + x + ac : u32
- PSRLD XMM6,32-mmxrot ; shift XMM4 right
- ROL a2, alurot2 ; rotate left : u32
- POR A,XMM6 ; OR together
- ADD a2, b2 ; a = a + b
- PADDD A, B ; a = a + b
- %endmacro
- ;#define I(x, y, z) (y ^ (x | ~z))
- ;#define II(a, b, c, d, x, s, ac) \
- ; (a) += I ((b), (c), (d)) + (x) + (uns32)(ac); \
- ; (a) = ROLuns32((a), (s)); \
- ; (a) += (b);
- ; Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, mmxrot,alurot1,alurot2 mmxtext,alutext1,alutext2,sin_text, 0d76aa478h,0e8c7b756h,PHASE
- %macro Ii 18
- %define A %1
- %define B %2
- %define C %3
- %define D %4
- %define a %5
- %define b %6
- %define c %7
- %define d %8
- %define mmxrot %9
- %define alurot1 %10
- %define alurot2 %11
- %define mmxtext_index %12
- %define alutext_index1 %13
- %define alutext_index2 %14
- %define sin_index %15
- %define constant1 %16
- %define constant2 %17
- %define PHASE %18
- %define a2 d
- %define b2 a
- %define c2 b
- %define d2 c
- MOVDQA XMM4, D ; load d
- MOV ESI, d ; load d : u32
- mov edi, [EBP + alutext_index1 * 4 + PHASE] ; load the text, x : u32
- PXOR XMM4,[_all_one] ; ~d
- NOT ESI ; ~d : u32
- ADD a, edi ; a = a + x
- PADDD A, [EBP + mmxtext_index * 16] ; a = a + x
- OR ESI, b ; b | ~d : u32
- ADD a, constant1 ; add the constants, sin values, x + ac : u32
- POR XMM4, B ; b | ~d
- XOR ESI, c ; I(b,c,d) = c ^ (b | ~d) : u32
- ADD a, ESI ; add to a, a = a + I(b,c,d) + x + ac : u32
- PADDD A, [_sin_const + sin_index * 16] ; add the constants, sin values, x + ac
- MOV ESI, d2 ; load d : u32
- ROL a, alurot1 ; rotate left : u32
- PXOR XMM4, C ; I(b,c,d) = c ^ (b | ~d)
- mov edi, [EBP + alutext_index2 * 4 + PHASE] ; load the text, x : u32
- ADD a, b ; add b : u32
- PADDD A,XMM4 ; add to a, a = a + I(b,c,d) + x + ac
- NOT ESI ; ~d : u32
- ADD a2, edi ; a + x
- ; ROT not native to XMMX
- %if USE_PSHUFD_ROL
- PSHUFD XMM6, A, 11100100b
- %else
- MOVDQA XMM6, A ; store a
- %endif ; %if USE_PSHUFD_ROL
- OR ESI, b2 ; b | ~d : u32
- ADD a2, constant2 ; add the constants, sin values, x + ac : u32
- PSLLD A, mmxrot ; shift a left
- XOR ESI, c2 ; I(b,c,d) = c ^ (b | ~d) : u32
- PSRLD XMM6,32-mmxrot ; shift XMM4 right
- ADD a2, ESI ; add to a, a = a + I(b,c,d) + x + ac : u32
- POR A,XMM6 ; OR together
- ROL a2, alurot2 ; rotate left : u32
- PADDD A, B ; add b
- ADD a2, b2 ; add b : u32
- %endmacro
- [bits 32]
- [section .text align=64]
-
- [global _md5_x86_u32sse2_2_transform]
- ; void _md5_x86_u32sse2_2_transform(uns32 *md, const uns32 *msg)
- _md5_x86_u32sse2_2_transform:
- PUSHAD
- MOV EBP,[ESP+36] ; source message
- ; load inital block into XMM0-XMM3
- MOVDQA XMM0,[_initial_state+ 0]
- MOVDQA XMM1,[_initial_state+16]
- MOVDQA XMM2,[_initial_state+32]
- MOVDQA XMM3,[_initial_state+48]
- ; load inital block into EAX,EBX,ECX,EDX
- ;the Input Block is at EBP+PHASE1
- MOV EAX,067452301h
- MOV EBX,0efcdab89h
- MOV ECX,098badcfeh
- MOV EDX,010325476h
- ; do FF rounds
- Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S11,S11,S12, 00,00,01, 00, 0d76aa478h,0e8c7b756h, PHASE1
- Ff XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S12,S13,S14, 01,02,03, 01, 0242070dbh,0c1bdceeeh, PHASE1
- Ff XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S13,S11,S12, 02,04,05, 02, 0f57c0fafh,04787c62ah, PHASE1
- Ff XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S14,S13,S14, 03,06,07, 03, 0a8304613h,0fd469501h, PHASE1
- Ff XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S11,S11,S12, 04,08,09, 04, 0698098d8h,08b44f7afh, PHASE1
- Ff XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S12,S13,S14, 05,10,11, 05, 0ffff5bb1h,0895cd7beh, PHASE1
- Ff XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S13,S11,S12, 06,12,13, 06, 06b901122h,0fd987193h, PHASE1
- Ff XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S14,S13,S14, 07,14,15, 07, 0a679438eh,049b40821h, PHASE1
- Fg XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S11,S21,S22, 08,01,06, 08, 0f61e2562h,0c040b340h, PHASE1
- Fg XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S12,S23,S24, 09,11,00, 09, 0265e5a51h,0e9b6c7aah, PHASE1
- Fg XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S13,S21,S22, 10,05,10, 10, 0d62f105dh,002441453h, PHASE1
- Fg XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S14,S23,S24, 11,15,04, 11, 0d8a1e681h,0e7d3fbc8h, PHASE1
- Fg XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S11,S21,S22, 12,09,14, 12, 021e1cde6h,0c33707d6h, PHASE1
- Fg XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S12,S23,S24, 13,03,08, 13, 0f4d50d87h,0455a14edh, PHASE1
- Fg XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S13,S21,S22, 14,13,02, 14, 0a9e3e905h,0fcefa3f8h, PHASE1
- Fg XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S14,S23,S24, 15,07,12, 15, 0676f02d9h,08d2a4c8ah, PHASE1
- ; do GG rounds
- Gh XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S21,S31,S32, 01,05,08, 16, 0fffa3942h,08771f681h, PHASE1
- Gh XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S22,S33,S34, 06,11,14, 17, 06d9d6122h,0fde5380ch, PHASE1
- Gh XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S23,S31,S32, 11,01,04, 18, 0a4beea44h,04bdecfa9h, PHASE1
- Gh XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S24,S33,S34, 00,07,10, 19, 0f6bb4b60h,0bebfbc70h, PHASE1
- Gh XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S21,S31,S32, 05,13,00, 20, 0289b7ec6h,0eaa127fah, PHASE1
- Gh XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S22,S33,S34, 10,03,06, 21, 0d4ef3085h,004881d05h, PHASE1
- Gh XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S23,S31,S32, 15,09,12, 22, 0d9d4d039h,0e6db99e5h, PHASE1
- Gh XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S24,S33,S34, 04,15,02, 23, 01fa27cf8h,0c4ac5665h, PHASE1
- Gi XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S21,S41,S42, 09,00,07, 24, 0f4292244h,0432aff97h, PHASE1
- Gi XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S22,S43,S44, 14,14,05, 25, 0ab9423a7h,0fc93a039h, PHASE1
- Gi XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S23,S41,S42, 03,12,03, 26, 0655b59c3h,08f0ccc92h, PHASE1
- Gi XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S24,S43,S44, 08,10,01, 27, 0ffeff47dh,085845dd1h, PHASE1
- Gi XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S21,S41,S42, 13,08,15, 28, 06fa87e4fh,0fe2ce6e0h, PHASE1
- Gi XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S22,S43,S44, 02,06,13, 29, 0a3014314h,04e0811a1h, PHASE1
- Gi XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S23,S41,S42, 07,04,11, 30, 0f7537e82h,0bd3af235h, PHASE1
- Gi XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S24,S43,S44, 12,02,09, 31, 02ad7d2bbh,0eb86d391h, PHASE1
- ; final addition
- ADD EAX,067452301h ;
- ADD EBX,0efcdab89h ;
- ADD ECX,098badcfeh ;
- ADD EDX,010325476h ;
- MOV [EBP+PHASE1+ 0],EAX ; 4bytes u32
- MOV [EBP+PHASE1+ 4],EBX ; 4bytes u32
- MOV [EBP+PHASE1+ 8],ECX ; 4bytes u32
- MOV [EBP+PHASE1+12],EDX ; 4bytes u32
- ; load inital block into EAX,EBX,ECX,EDX
- ;the Input Block is at EBP+PHASE2
- MOV EAX,067452301h
- MOV EBX,0efcdab89h
- MOV ECX,098badcfeh
- MOV EDX,010325476h
- ; do HH rounds
- Hf XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S31,S11,S12, 05,00,01, 32, 0d76aa478h,0e8c7b756h, PHASE2
- Hf XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S32,S13,S14, 08,02,03, 33, 0242070dbh,0c1bdceeeh, PHASE2
- Hf XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S33,S11,S12, 11,04,05, 34, 0f57c0fafh,04787c62ah, PHASE2
- Hf XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S34,S13,S14, 14,06,07, 35, 0a8304613h,0fd469501h, PHASE2
- Hf XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S31,S11,S12, 01,08,09, 36, 0698098d8h,08b44f7afh, PHASE2
- Hf XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S32,S13,S14, 04,10,11, 37, 0ffff5bb1h,0895cd7beh, PHASE2
- Hf XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S33,S11,S12, 07,12,13, 38, 06b901122h,0fd987193h, PHASE2
- Hf XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S34,S13,S14, 10,14,15, 39, 0a679438eh,049b40821h, PHASE2
- Hg XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S31,S21,S22, 13,01,06, 40, 0f61e2562h,0c040b340h, PHASE2
- Hg XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S32,S23,S24, 00,11,00, 41, 0265e5a51h,0e9b6c7aah, PHASE2
- Hg XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S33,S21,S22, 03,05,10, 42, 0d62f105dh,002441453h, PHASE2
- Hg XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S34,S23,S24, 06,15,04, 43, 0d8a1e681h,0e7d3fbc8h, PHASE2
- Hg XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S31,S21,S22, 09,09,14, 44, 021e1cde6h,0c33707d6h, PHASE2
- Hg XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S32,S23,S24, 12,03,08, 45, 0f4d50d87h,0455a14edh, PHASE2
- Hg XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S33,S21,S22, 15,13,02, 46, 0a9e3e905h,0fcefa3f8h, PHASE2
- Hg XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S34,S23,S24, 02,07,12, 47, 0676f02d9h,08d2a4c8ah, PHASE2
- ; do II rounds
- Ih XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S41,S31,S32, 00,05,08, 48, 0fffa3942h,08771f681h, PHASE2
- Ih XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S42,S33,S34, 07,11,14, 49, 06d9d6122h,0fde5380ch, PHASE2
- Ih XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S43,S31,S32, 14,01,04, 50, 0a4beea44h,04bdecfa9h, PHASE2
- Ih XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S44,S33,S34, 05,07,10, 51, 0f6bb4b60h,0bebfbc70h, PHASE2
- Ih XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S41,S31,S32, 12,13,00, 52, 0289b7ec6h,0eaa127fah, PHASE2
- Ih XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S42,S33,S34, 03,03,06, 53, 0d4ef3085h,004881d05h, PHASE2
- Ih XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S43,S31,S32, 10,09,12, 54, 0d9d4d039h,0e6db99e5h, PHASE2
- Ih XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S44,S33,S34, 01,15,02, 55, 01fa27cf8h,0c4ac5665h, PHASE2
- Ii XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S41,S41,S42, 08,00,07, 56, 0f4292244h,0432aff97h, PHASE2
- Ii XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S42,S43,S44, 15,14,05, 57, 0ab9423a7h,0fc93a039h, PHASE2
- Ii XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S43,S41,S42, 06,12,03, 58, 0655b59c3h,08f0ccc92h, PHASE2
- Ii XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S44,S43,S44, 13,10,01, 59, 0ffeff47dh,085845dd1h, PHASE2
- Ii XMM0,XMM1,XMM2,XMM3, EAX,EBX,ECX,EDX, S41,S41,S42, 04,08,15, 60, 06fa87e4fh,0fe2ce6e0h, PHASE2
- Ii XMM3,XMM0,XMM1,XMM2, ECX,EDX,EAX,EBX, S42,S43,S44, 11,06,13, 61, 0a3014314h,04e0811a1h, PHASE2
- Ii XMM2,XMM3,XMM0,XMM1, EAX,EBX,ECX,EDX, S43,S41,S42, 02,04,11, 62, 0f7537e82h,0bd3af235h, PHASE2
- Ii XMM1,XMM2,XMM3,XMM0, ECX,EDX,EAX,EBX, S44,S43,S44, 09,02,09, 63, 02ad7d2bbh,0eb86d391h, PHASE2
- ; final addition
- PADDD XMM0,[_initial_state+ 0]
- ADD EAX,067452301h ;
- PADDD XMM1,[_initial_state+16]
- ADD EBX,0efcdab89h ;
- PADDD XMM2,[_initial_state+32]
- ADD ECX,098badcfeh ;
- PADDD XMM3,[_initial_state+48]
- ADD EDX,010325476h ;
- ; mark add 4
- ;MOV EBP,[ESP+36] ; the digest destination, same as input source...faster this way
- MOVDQA [EBP+ 0],XMM0 ; 8bytes mmx
- MOVDQA [EBP+16],XMM1 ; 8bytes mmx
- MOVDQA [EBP+32],XMM2 ; 8bytes mmx
- MOVDQA [EBP+48],XMM3 ; 8bytes mmx
- MOV [EBP+PHASE2+ 0],EAX ; 4bytes u32
- MOV [EBP+PHASE2+ 4],EBX ; 4bytes u32
- MOV [EBP+PHASE2+ 8],ECX ; 4bytes u32
- MOV [EBP+PHASE2+12],EDX ; 4bytes u32
- ; close up shop and return
- ;EXMMS
- POPAD; EDI, ESI, EBP, nothing (placeholder for ESP), EBX, EDX, ECX and EAX. It reverses the operation of PUSHAD
- RET
- [bits 32]
- [section .data align=64]
- ;buffer to make sure that code and data never share the same cache line.
- empty:
- times 128 db 0
- align 16
- tempi:
- times 64 dd 0
- tempo:
- times 16 dd 0
- _all_one:
- times 4 DD 0ffffffffh
- _initial_state:
- times 4 DD 067452301h
- times 4 DD 0efcdab89h
- times 4 DD 098badcfeh
- times 4 DD 010325476h
- _sin_const:
- times 4 DD 0d76aa478h
- times 4 DD 0e8c7b756h
- times 4 DD 0242070dbh
- times 4 DD 0c1bdceeeh
- times 4 DD 0f57c0fafh
- times 4 DD 04787c62ah
- times 4 DD 0a8304613h
- times 4 DD 0fd469501h
- times 4 DD 0698098d8h
- times 4 DD 08b44f7afh
- times 4 DD 0ffff5bb1h
- times 4 DD 0895cd7beh
- times 4 DD 06b901122h
- times 4 DD 0fd987193h
- times 4 DD 0a679438eh
- times 4 DD 049b40821h
- times 4 DD 0f61e2562h
- times 4 DD 0c040b340h
- times 4 DD 0265e5a51h
- times 4 DD 0e9b6c7aah
- times 4 DD 0d62f105dh
- times 4 DD 002441453h
- times 4 DD 0d8a1e681h
- times 4 DD 0e7d3fbc8h
- times 4 DD 021e1cde6h
- times 4 DD 0c33707d6h
- times 4 DD 0f4d50d87h
- times 4 DD 0455a14edh
- times 4 DD 0a9e3e905h
- times 4 DD 0fcefa3f8h
- times 4 DD 0676f02d9h
- times 4 DD 08d2a4c8ah
- times 4 DD 0fffa3942h
- times 4 DD 08771f681h
- times 4 DD 06d9d6122h
- times 4 DD 0fde5380ch
- times 4 DD 0a4beea44h
- times 4 DD 04bdecfa9h
- times 4 DD 0f6bb4b60h
- times 4 DD 0bebfbc70h
- times 4 DD 0289b7ec6h
- times 4 DD 0eaa127fah
- times 4 DD 0d4ef3085h
- times 4 DD 004881d05h
- times 4 DD 0d9d4d039h
- times 4 DD 0e6db99e5h
- times 4 DD 01fa27cf8h
- times 4 DD 0c4ac5665h
- times 4 DD 0f4292244h
- times 4 DD 0432aff97h
- times 4 DD 0ab9423a7h
- times 4 DD 0fc93a039h
- times 4 DD 0655b59c3h
- times 4 DD 08f0ccc92h
- times 4 DD 0ffeff47dh
- times 4 DD 085845dd1h
- times 4 DD 06fa87e4fh
- times 4 DD 0fe2ce6e0h
- times 4 DD 0a3014314h
- times 4 DD 04e0811a1h
- times 4 DD 0f7537e82h
- times 4 DD 0bd3af235h
- times 4 DD 02ad7d2bbh
- times 4 DD 0eb86d391h
|
![[:petrus75] [:petrus75]](https://forum-images.hardware.fr/images/perso/petrus75.gif) |