261
Name:
FrozenVoid
!!mJCwdV5J0Xy2A21
2011-11-05 13:48
Conclusion: no magic occurs, ternary just get optimized very well by DMC and branchless bithacks are equivalent speed but -3% faster when unoptimized code is generated. This is optimized code with ternary vs bithack without |0:(ternary wins by 17 cycles)
C:\Program Files\dmc8.50\dmc\dm\bin\code>abs
Cycles spent:30110
Cycles spent:30127
; Disassembly of file: abs.obj
; Sat Nov 05 20:18:42 2011
; Mode: 32 bits
; Syntax: MASM/ML
; Instruction set: Pentium
.586
.model flat
public _main
public _rdtsc ; Note: Communal. Not supported by MASM
extern _rand: near
extern _printf: near
extern _rdtsc: near
extern __acrtused_con: byte
FLAT GROUP
_TEXT SEGMENT DWORD PUBLIC 'CODE' ; section number 1
_main PROC NEAR
push ebp ; 0000 _ 55
mov ebp, esp ; 0001 _ 8B. EC
mov edx, 9 ; 0003 _ BA, 00000009
?_001: sub esp, 4096 ; 0008 _ 81. EC, 00001000
test dword ptr [esp], esp ; 000E _ 85. 24 24
dec edx ; 0011 _ 4A
jnz ?_001 ; 0012 _ 75, F4
sub esp, 3160 ; 0014 _ 81. EC, 00000C58
push ebx ; 001A _ 53
push esi ; 001B _ 56
push edi ; 001C _ 57
mov dword ptr [ebp-9C50H], 0 ; 001D _ C7. 85, FFFF63B0, 00000000
?_002: cmp dword ptr [ebp-9C50H], 10000 ; 0027 _ 81. BD, FFFF63B0, 00002710
jge ?_003 ; 0031 _ 7D, 28
call _rand ; 0033 _ E8, 00000000(rel)
mov ecx, 10000 ; 0038 _ B9, 00002710
cdq ; 003D _ 99
idiv ecx ; 003E _ F7. F9
add edx, -5000 ; 0040 _ 81. C2, FFFFEC78
mov eax, dword ptr [ebp-9C50H] ; 0046 _ 8B. 85, FFFF63B0
mov dword ptr [ebp+eax*4-9C40H], edx ; 004C _ 89. 94 85, FFFF63C0
inc dword ptr [ebp-9C50H] ; 0053 _ FF. 85, FFFF63B0
jmp ?_002 ; 0059 _ EB, CC
?_003: call _rdtsc ; 005B _ E8, 00000000(rel)
mov dword ptr [ebp-9C48H], eax ; 0060 _ 89. 85, FFFF63B8
mov dword ptr [ebp-9C44H], edx ; 0066 _ 89. 95, FFFF63BC
mov dword ptr [ebp-9C4CH], 0 ; 006C _ C7. 85, FFFF63B4, 00000000
?_004: cmp dword ptr [ebp-9C4CH], 10000 ; 0076 _ 81. BD, FFFF63B4, 00002710
jge ?_007 ; 0080 _ 7D, 1D
mov edx, dword ptr [ebp-9C4CH] ; 0082 _ 8B. 95, FFFF63B4
mov ebx, dword ptr [ebp+edx*4-9C40H] ; 0088 _ 8B. 9C 95, FFFF63C0
test ebx, ebx ; 008F _ 85. DB
jle ?_005 ; 0091 _ 7E, 02
jmp ?_006 ; 0093 _ EB, 02
?_005: neg ebx ; 0095 _ F7. DB
?_006: inc dword ptr [ebp-9C4CH] ; 0097 _ FF. 85, FFFF63B4
jmp ?_004 ; 009D _ EB, D7
?_007: call _rdtsc ; 009F _ E8, 00000000(rel)
sub eax, dword ptr [ebp-9C48H] ; 00A4 _ 2B. 85, FFFF63B8
sbb edx, dword ptr [ebp-9C44H] ; 00AA _ 1B. 95, FFFF63BC
push edx ; 00B0 _ 52
push eax ; 00B1 _ 50
push offset FLAT:?_010 ; 00B2 _ 68, 00000000(segrel)
call _printf ; 00B7 _ E8, 00000000(rel)
call _rdtsc ; 00BC _ E8, 00000000(rel)
mov dword ptr [ebp-9C48H], eax ; 00C1 _ 89. 85, FFFF63B8
mov dword ptr [ebp-9C44H], edx ; 00C7 _ 89. 95, FFFF63BC
add esp, 12 ; 00CD _ 83. C4, 0C
mov dword ptr [ebp-9C4CH], 0 ; 00D0 _ C7. 85, FFFF63B4, 00000000
?_008: cmp dword ptr [ebp-9C4CH], 10000 ; 00DA _ 81. BD, FFFF63B4, 00002710
jge ?_009 ; 00E4 _ 7D, 2F
mov esi, dword ptr [ebp-9C4CH] ; 00E6 _ 8B. B5, FFFF63B4
mov ecx, dword ptr [ebp+esi*4-9C40H] ; 00EC _ 8B. 8C B5, FFFF63C0
mov dword ptr [ebp-9C54H], ecx ; 00F3 _ 89. 8D, FFFF63AC
add ecx, ecx ; 00F9 _ 01. C9
sbb ecx, ecx ; 00FB _ 19. C9
inc ecx ; 00FD _ 41
; Note: Displacement could be made smaller by sign extension
lea eax, [ecx*2-1H] ; 00FE _ 8D. 04 4D, FFFFFFFF
mov edi, dword ptr [ebp-9C54H] ; 0105 _ 8B. BD, FFFF63AC
imul edi ; 010B _ F7. EF
inc dword ptr [ebp-9C4CH] ; 010D _ FF. 85, FFFF63B4
jmp ?_008 ; 0113 _ EB, C5
?_009: call _rdtsc ; 0115 _ E8, 00000000(rel)
sub eax, dword ptr [ebp-9C48H] ; 011A _ 2B. 85, FFFF63B8
sbb edx, dword ptr [ebp-9C44H] ; 0120 _ 1B. 95, FFFF63BC
push edx ; 0126 _ 52
push eax ; 0127 _ 50
push offset FLAT:?_010 ; 0128 _ 68, 00000000(segrel)
call _printf ; 012D _ E8, 00000000(rel)
xor eax, eax ; 0132 _ 31. C0
add esp, 12 ; 0134 _ 83. C4, 0C
pop edi ; 0137 _ 5F
pop esi ; 0138 _ 5E
pop ebx ; 0139 _ 5B
leave ; 013A _ C9
ret ; 013B _ C3
_main ENDP
_TEXT ENDS
_DATA SEGMENT DWORD PUBLIC 'DATA' ; section number 2
?_010 label byte
db 43H, 79H, 63H, 6CH, 65H, 73H, 20H, 73H ; 0000 _ Cycles s
db 70H, 65H, 6EH, 74H, 3AH, 25H, 6CH, 6CH ; 0008 _ pent:%ll
db 75H, 0AH, 00H ; 0010 _ u..
_DATA ENDS
CONST SEGMENT DWORD PUBLIC 'CONST' ; section number 3
CONST ENDS
_BSS SEGMENT DWORD PUBLIC 'BSS' ; section number 4
_BSS ENDS
_text$_rdtsc SEGMENT DWORD PUBLIC 'CODE' ; section number 5
; Communal section not supported by MASM
_rdtsc PROC NEAR
; COMDEF _rdtsc
rdtsc ; 0000 _ 0F 31
ret ; 0002 _ C3
_rdtsc ENDP
_text$_rdtsc ENDS
END