Copyright (C) 2003, 2009 Lavalys Consulting Group, Inc. All rights reserved. everest_bench.dll build: 2.4.257.0 Jan 19 2009 01:57:35 CPUCount: 2, procMask: 0x00000003 Size of Memory: 3109976KB Priority:080 CPU#00 Vendor: GenuineIntel CoreType:0x20010676 CPU#00 Family: 6 Model: 17 Stepping: 6 Type: "Intel(R) Core(TM)2 Duo CPU P8400 @ 2.26GHz" CPU#00 Features: TSC, FPU, CMOV, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, CMPXCHG8B, HTT CPU#00 Frequency: 2260.90MHz OS:5.1.2600 Szervizcsomag 3 CPU#00 AffMask:0x00000001 APIC_ID:0x00000000 Phys_ID:000 Core_ID:00 SMT_ID:00 PhysMask:0x00000003 CPU#00 L1I cache: 32KB, 64 byte cache line, 8 way, SMask:00000001 CPU#00 L1D cache: 32KB, 64 byte cache line, 8 way, SMask:00000001 CPU#00 L2 cache: 3072KB, 64 byte cache line, 12 way, SMask:00000003 CPU#01 Vendor: GenuineIntel CoreType:0x20010676 CPU#01 Family: 6 Model: 17 Stepping: 6 Type: "Intel(R) Core(TM)2 Duo CPU P8400 @ 2.26GHz" CPU#01 Features: TSC, FPU, CMOV, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, CMPXCHG8B, HTT CPU#01 Frequency: 2260.87MHz OS:5.1.2600 Szervizcsomag 3 CPU#01 AffMask:0x00000002 APIC_ID:0x00000001 Phys_ID:000 Core_ID:00 SMT_ID:01 PhysMask:0x00000003 CPU#01 L1I cache: 32KB, 64 byte cache line, 8 way, SMask:00000002 CPU#01 L1D cache: 32KB, 64 byte cache line, 8 way, SMask:00000002 CPU#01 L2 cache: 3072KB, 64 byte cache line, 12 way, SMask:00000003 Parameters: "-ph -d4031 " Instruction Latency: Used CPUs: 1 ProcMask:0x00000001 0 X86 :NOP L: [no true dep.] T: 0.14ns= 0.32c 1 X86 :0x66 NOP L: [no true dep.] T: 0.14ns= 0.32c 2 X86 : 2x 0x66 NOP L: [no true dep.] T: 0.14ns= 0.32c 3 X86 : 3x 0x66 NOP L: [no true dep.] T: 0.14ns= 0.32c 4 X86 : 4x 0x66 NOP L: [no true dep.] T: 0.14ns= 0.32c 5 X86 : 5x 0x66 NOP L: [no true dep.] T: 0.16ns= 0.35c 6 X86 : 6x 0x66 NOP L: [no true dep.] T: 0.18ns= 0.41c 7 X86 : 7x 0x66 NOP L: [no true dep.] T: 0.21ns= 0.47c 8 X86 : 8x 0x66 NOP L: [no true dep.] T: 0.23ns= 0.53c 9 X86 : 9x 0x66 NOP L: [no true dep.] T: 0.26ns= 0.59c 10 X86 :10x 0x66 NOP L: [no true dep.] T: 0.29ns= 0.65c 11 X86 :11x 0x66 NOP L: [no true dep.] T: 0.31ns= 0.71c 12 X86 :12x 0x66 NOP L: [no true dep.] T: 0.34ns= 0.77c 13 X86 :13x 0x66 NOP L: [no true dep.] T: 0.37ns= 0.83c 14 X86 :14x 0x66 NOP L: [no true dep.] T: 0.39ns= 0.89c 15 SSE2 :PAUSE L: [no true dep.] T: 3.35ns= 7.58c 16 X86 :MOV r8, imm8 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 17 X86 :MOV r16, imm16 L: 0.74ns= 1.7c T: 0.74ns= 1.67c 18 X86 :MOV r32, imm32 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 20 X86 :MOV r8, r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 21 X86 :MOV r16, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 22 X86 :MOV r32, r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 24 X86 :MOV r8, [m8] L: 1.66ns= 3.8c T: 0.41ns= 0.92c 25 X86 :MOV r16, [m16] L: 1.66ns= 3.8c T: 0.41ns= 0.92c 26 X86 :MOV r32, [m32] L: 1.25ns= 2.8c T: 0.41ns= 0.92c 28 X86 :MOV [m8], r8 L: [memory dep.] T: 0.41ns= 0.92c 29 X86 :MOV [m16], r16 L: [memory dep.] T: 0.41ns= 0.92c 30 X86 :MOV [m32], r32 L: [memory dep.] T: 0.41ns= 0.92c 31 X86 :MOV [m32 + 8], r32 L: [memory dep.] T: 0.41ns= 0.92c 34 X86 :MOV r8,[m8]+MOV [m8],r8 L: 5.01ns= 11.3c T: 1.07ns= 2.42c 35 X86 :MOV r16,[m16]+MOV [m16],r16 L: 7.52ns= 17.0c T: 0.25ns= 0.57c 36 X86 :MOV r32,[m32]+MOV [m32],r32 L: 6.67ns= 15.1c T: 0.28ns= 0.63c 38 SSE2 :MOVNTI [m32], r32 L: [memory dep.] T: 0.92ns= 0.92c 40 CMOV :CMOVNZ r16, r16 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 41 CMOV :CMOVNZ r32, r32 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 43 X86 :MOVSX r16, r8 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 44 X86 :MOVSX r32, r8 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 46 X86 :MOVSX r32, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 49 X86 :MOVZX r16, r8 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 50 X86 :MOVZX r32, r8 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 52 X86 :MOVZX r32, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 54 X86 :XCHG r8, r8 L: 1.07ns= 2.4c T: 0.39ns= 0.88c 55 X86 :XCHG r16, r16 L: 1.07ns= 2.4c T: 0.39ns= 0.88c 56 X86 :XCHG r32, r32 L: 1.07ns= 2.4c T: 0.39ns= 0.88c 58 X86 :XCHG r1_8, r2_8 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 59 X86 :XCHG r1_16, r2_16 L: 0.81ns= 1.8c T: 0.41ns= 0.92c 60 X86 :XCHG r1_32, r2_32 L: 0.81ns= 1.8c T: 0.41ns= 0.92c 62 X86 :XCHG r8, [m8] L: 7.52ns= 17.0c T: 7.63ns= 17.25c 63 X86 :XCHG r16, [m16] L: 7.52ns= 17.0c T: 7.63ns= 17.25c 64 X86 :XCHG r32, [m32] L: 7.56ns= 17.1c T: 7.63ns= 17.25c 66 X86 :ADD r32, 0x04000 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 67 X86 :ADD r32, 0x08000 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 68 X86 :ADD r32, 0x10000 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 69 X86 :ADD r32, 0x20000 L: 0.41ns= 0.9c T: 0.15ns= 0.34c 70 X86 :ADD r8, r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 71 X86 :ADD r16, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 72 X86 :ADD r32, r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 74 X86 :ADD r8, [m8] L: 2.10ns= 4.8c T: 0.41ns= 0.92c 75 X86 :ADD r16, [m16] L: 2.10ns= 4.8c T: 0.41ns= 0.92c 76 X86 :ADD r32, [m32] L: 1.81ns= 4.1c T: 0.41ns= 0.92c 78 X86 :ADD [m8], r8 L: 2.51ns= 5.7c T: 0.70ns= 1.58c 79 X86 :ADD [m16], r16 L: 2.51ns= 5.7c T: 0.85ns= 1.92c 80 X86 :ADD [m32], r32 L: 2.51ns= 5.7c T: 0.85ns= 1.92c 81 X86 :ADD [m32 + 8], r32 L: 2.51ns= 5.7c T: 0.41ns= 0.92c 84 X86 :LOCK ADD [m8], r8 L: 8.44ns= 19.1c T: 8.77ns= 19.83c 85 X86 :LOCK ADD [m16], r16 L: 8.44ns= 19.1c T: 8.48ns= 19.17c 86 X86 :LOCK ADD [m32], r32 L: 8.37ns= 18.9c T: 8.48ns= 19.17c 87 X86 :LOCK ADD [m32 + 8], r32 L: 8.37ns= 18.9c T: 8.48ns= 19.17c 90 X86 :ADD r8, imm8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 91 X86 :ADD r16, imm8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 92 X86 :ADD r32, imm8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 94 X86 :ADD r16, imm16 L: 0.92ns= 2.1c T: 0.92ns= 2.08c 95 X86 :ADD r32, imm32 L: 0.44ns= 1.0c T: 0.16ns= 0.35c 97 X86 :ADD [m8], imm8 L: 2.51ns= 5.7c T: 0.70ns= 1.58c 98 X86 :ADD [m16], imm8 L: 2.51ns= 5.7c T: 0.70ns= 1.58c 99 X86 :ADD [m32], imm8 L: 2.51ns= 5.7c T: 0.70ns= 1.58c 101 X86 :ADD [m16], imm16 L: 2.51ns= 5.7c T: 1.11ns= 2.50c 102 X86 :ADD [m32], imm32 L: 2.51ns= 5.7c T: 0.70ns= 1.58c 104 X86 :ADD al, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 105 X86 :ADD ax, imm16 L: 0.74ns= 1.7c T: 0.74ns= 1.67c 106 X86 :ADD eax, imm32 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 108 X86 :SUB r8, r8 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 109 X86 :SUB r16, r16 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 110 X86 :SUB r32, r32 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 112 X86 :SUB r1_8, r2_8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 113 X86 :SUB r1_16, r2_16 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 114 X86 :SUB r1_32, r2_32 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 116 X86 :ADC r8, r8 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 117 X86 :ADC r16, r16 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 118 X86 :ADC r32, r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 120 X86 :SBB r8, r8 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 121 X86 :SBB r16, r16 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 122 X86 :SBB r32, r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 124 X86 :SBB r1_8, r2_8 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 125 X86 :SBB r1_16, r2_16 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 126 X86 :SBB r1_32, r2_32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 128 X86 :CMP r8, r8 L: [no true dep.] T: 0.21ns= 0.47c 129 X86 :CMP r16, r16 L: [no true dep.] T: 0.21ns= 0.47c 130 X86 :CMP r32, r32 L: [no true dep.] T: 0.21ns= 0.47c 132 X86 :CMP r1_8, r2_8 L: [no true dep.] T: 0.41ns= 0.92c 133 X86 :CMP r1_16, r2_16 L: [no true dep.] T: 0.41ns= 0.92c 134 X86 :CMP r1_32, r2_32 L: [no true dep.] T: 0.41ns= 0.92c 136 X86 :AND r8, r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 137 X86 :AND r16, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 138 X86 :AND r32, r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 140 X86 :AND r1_8, r2_8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 141 X86 :AND r1_16, r2_16 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 142 X86 :AND r1_32, r2_32 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 144 X86 :OR r8, r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 145 X86 :OR r16, r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 146 X86 :OR r32, r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 148 X86 :OR r1_8, r2_8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 149 X86 :OR r1_16, r2_16 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 150 X86 :OR r1_32, r2_32 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 152 X86 :XOR r8, r8 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 153 X86 :XOR r16, r16 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 154 X86 :XOR r32, r32 L: 0.14ns= 0.3c T: 0.14ns= 0.31c 156 X86 :XOR r1_8, r2_8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 157 X86 :XOR r1_16, r2_16 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 158 X86 :XOR r1_32, r2_32 L: 0.41ns= 0.9c T: 0.21ns= 0.48c 160 X86 :NEG r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 161 X86 :NEG r16 L: 0.41ns= 0.9c T: 0.23ns= 0.53c 162 X86 :NEG r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 164 X86 :NOT r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 165 X86 :NOT r16 L: 0.41ns= 0.9c T: 0.23ns= 0.53c 166 X86 :NOT r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 168 X86 :TEST r8, r8 L: [no true dep.] T: 0.21ns= 0.47c 169 X86 :TEST r16, r16 L: [no true dep.] T: 0.21ns= 0.47c 170 X86 :TEST r32, r32 L: [no true dep.] T: 0.21ns= 0.47c 172 X86 :TEST r1_8, r2_8 L: [no true dep.] T: 0.41ns= 0.92c 173 X86 :TEST r1_16, r2_16 L: [no true dep.] T: 0.41ns= 0.92c 174 X86 :TEST r1_32, r2_32 L: [no true dep.] T: 0.41ns= 0.92c 176 X86 :BT r16, r16 L: [no true dep.] T: 0.41ns= 0.92c 177 X86 :BT r32, r32 L: [no true dep.] T: 0.41ns= 0.92c 179 X86 :BT r16, imm8 L: [no true dep.] T: 0.41ns= 0.92c 180 X86 :BT r32, imm8 L: [no true dep.] T: 0.41ns= 0.92c 182 X86 :BTC r16, r16 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 183 X86 :BTC r32, r32 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 185 X86 :BTC r16, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 186 X86 :BTC r32, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 188 X86 :BTR r16, r16 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 189 X86 :BTR r32, r32 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 191 X86 :BTR r16, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 192 X86 :BTR r32, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 194 X86 :BTS r16, r16 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 195 X86 :BTS r32, r32 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 197 X86 :BTS r16, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 198 X86 :BTS r32, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 200 X86 :SETC r8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 201 X86 :INC r8 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 202 X86 :INC r16 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 203 X86 :INC r32 L: 0.41ns= 0.9c T: 0.14ns= 0.32c 205 X86 :LEA r16, [r16+r16] L: 1.36ns= 3.1c T: 1.36ns= 3.08c 206 X86 :LEA r32, [r32+r32] L: 0.41ns= 0.9c T: 0.41ns= 0.92c 208 X86 :LEA r16, [r+r+disp8] L: 1.36ns= 3.1c T: 1.36ns= 3.08c 209 X86 :LEA r32, [r+r+disp8] L: 0.41ns= 0.9c T: 0.41ns= 0.92c 211 X86 :LEA r16, [r+r*8] L: 1.36ns= 3.1c T: 1.36ns= 3.08c 212 X86 :LEA r32, [r+r*8] L: 0.41ns= 0.9c T: 0.41ns= 0.92c 214 X86 :LEA r16, [r+r*8+disp8] L: 1.36ns= 3.1c T: 1.36ns= 3.08c 215 X86 :LEA r32, [r+r*8+disp8] L: 0.41ns= 0.9c T: 0.41ns= 0.92c 217 X86 :SHL r8, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 218 X86 :SHL r16, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 219 X86 :SHL r32, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 221 X86 :SHL r8, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 222 X86 :SHL r16, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 223 X86 :SHL r32, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 225 X86 :SHL r8, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 226 X86 :SHL r16, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 227 X86 :SHL r32, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 229 X86 :SHR r8, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 230 X86 :SHR r16, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 231 X86 :SHR r32, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 233 X86 :SHR r8, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 234 X86 :SHR r16, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 235 X86 :SHR r32, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 237 X86 :SHR r8, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 238 X86 :SHR r16, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 239 X86 :SHR r32, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 241 X86 :SAR r8, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 242 X86 :SAR r16, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 243 X86 :SAR r32, 1 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 245 X86 :SAR r8, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 246 X86 :SAR r16, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 247 X86 :SAR r32, imm8 L: 0.41ns= 0.9c T: 0.21ns= 0.47c 249 X86 :SAR r8, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 250 X86 :SAR r16, cl L: 0.44ns= 1.0c T: 0.19ns= 0.42c 251 X86 :SAR r32, cl L: 0.41ns= 0.9c T: 0.20ns= 0.46c 253 X86 :SHLD r16, r16, imm8 L: 0.85ns= 1.9c T: 0.40ns= 0.89c 254 X86 :SHLD r32, r32, imm8 L: 0.85ns= 1.9c T: 0.39ns= 0.89c 256 X86 :SHLD r16, r16, cl L: 0.85ns= 1.9c T: 0.39ns= 0.89c 257 X86 :SHLD r32, r32, cl L: 0.85ns= 1.9c T: 0.39ns= 0.89c 259 X86 :SHRD r16, r16, imm8 L: 0.85ns= 1.9c T: 0.40ns= 0.89c 260 X86 :SHRD r32, r32, imm8 L: 0.85ns= 1.9c T: 0.40ns= 0.89c 262 X86 :SHRD r16, r16, cl L: 0.85ns= 1.9c T: 0.39ns= 0.89c 263 X86 :SHRD r32, r32, cl L: 0.85ns= 1.9c T: 0.39ns= 0.89c 265 X86 :ROL r8, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 266 X86 :ROL r16, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 267 X86 :ROL r32, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 269 X86 :ROL r8, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 270 X86 :ROL r16, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 271 X86 :ROL r32, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 273 X86 :ROL r8, cl L: 0.41ns= 0.9c T: 0.41ns= 0.92c 274 X86 :ROL r16, cl L: 0.41ns= 0.9c T: 0.41ns= 0.92c 275 X86 :ROL r32, cl L: 0.41ns= 0.9c T: 0.41ns= 0.92c 277 X86 :ROR r8, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 278 X86 :ROR r16, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 279 X86 :ROR r32, 1 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 281 X86 :ROR r8, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 282 X86 :ROR r16, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 283 X86 :ROR r32, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 285 X86 :ROR r8, cl L: 0.41ns= 0.9c T: 0.41ns= 0.92c 286 X86 :ROR r16, cl L: 0.44ns= 1.0c T: 0.41ns= 0.92c 287 X86 :ROR r32, cl L: 0.44ns= 1.0c T: 0.44ns= 1.00c 289 X86 :RCL r8, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 290 X86 :RCL r16, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 291 X86 :RCL r32, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 293 X86 :RCL r8, imm8 L: 5.01ns= 11.3c T: 4.17ns= 9.42c 294 X86 :RCL r16, imm8 L: 4.79ns= 10.8c T: 4.17ns= 9.42c 295 X86 :RCL r32, imm8 L: 4.79ns= 10.8c T: 4.17ns= 9.42c 297 X86 :RCL r8, cl L: 5.01ns= 11.3c T: 4.17ns= 9.42c 298 X86 :RCL r16, cl L: 4.79ns= 10.8c T: 4.17ns= 9.42c 299 X86 :RCL r32, cl L: 4.79ns= 10.8c T: 4.17ns= 9.42c 301 X86 :RCR r8, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 302 X86 :RCR r16, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 303 X86 :RCR r32, 1 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 305 X86 :RCR r8, imm8 L: 5.12ns= 11.6c T: 4.61ns= 10.42c 306 X86 :RCR r16, imm8 L: 4.79ns= 10.8c T: 4.17ns= 9.42c 307 X86 :RCR r32, imm8 L: 4.79ns= 10.8c T: 4.17ns= 9.42c 309 X86 :RCR r8, cl L: 5.12ns= 11.6c T: 4.61ns= 10.42c 310 X86 :RCR r16, cl L: 4.79ns= 10.8c T: 4.17ns= 9.42c 311 X86 :RCR r32, cl L: 4.79ns= 10.8c T: 4.17ns= 9.42c 313 X86 :BSF r16, r16 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 314 X86 :BSF r32, r32 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 316 X86 :BSR r16, r16 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 317 X86 :BSR r32, r32 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 319 X86 :BSWAP r32 L: 1.66ns= 3.8c T: 0.41ns= 0.92c 327 X86 :IMUL r16, r16 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 328 X86 :IMUL r32, r32 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 330 X86 :IMUL r16, r16, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 331 X86 :IMUL r32, r32, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 333 X86 :IMUL r16, r16, imm16 L: 1.25ns= 2.8c T: 0.92ns= 2.08c 334 X86 :IMUL r32, r32, imm32 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 336 X86 :IMUL r8 (ah) L: 1.66ns= 3.8c T: 1.25ns= 2.83c 337 X86 :IMUL r16 (dx) L: 2.03ns= 4.6c T: 1.99ns= 4.50c 338 X86 :IMUL r32 (edx) L: 2.03ns= 4.6c T: 1.99ns= 4.50c 340 X86 :MUL r8 (ah) L: 1.66ns= 3.8c T: 1.25ns= 2.83c 341 X86 :MUL r16 (dx) L: 2.03ns= 4.6c T: 1.99ns= 4.50c 342 X86 :MUL r32 (edx) L: 2.03ns= 4.6c T: 1.99ns= 4.50c 344 X86 :IMUL r8 (al) L: 1.25ns= 2.8c T: 1.25ns= 2.83c 345 X86 :IMUL r16 (ax) L: 1.99ns= 4.5c T: 1.99ns= 4.50c 346 X86 :IMUL r32 (eax) L: 1.99ns= 4.5c T: 1.99ns= 4.50c 348 X86 :MUL r8 (al) L: 1.25ns= 2.8c T: 1.25ns= 2.83c 349 X86 :MUL r16 (ax) L: 1.99ns= 4.5c T: 1.99ns= 4.50c 350 X86 :MUL r32 (eax) L: 1.99ns= 4.5c T: 1.99ns= 4.50c 352 X86 :IDIV r8 14/ 7b (full) L: 7.52ns= 17.0c T: 7.11ns= 16.08c 353 X86 :IDIV r8 12/ 7b ax upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 354 X86 :IDIV r8 7/ 7b ax upd L: 4.17ns= 9.4c T: 4.17ns= 9.42c 355 X86 :IDIV r8 4/ 7b ax upd L: [no true dep.] T: 2.10ns= 4.75c 356 X86 :IDIV r8 0/ 7b L: [no true dep.] T: 5.01ns= 11.33c 357 X86 :IDIV r8 11/ 4b ax upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 358 X86 :IDIV r8 8/ 4b ax upd L: [no true dep.] T: 2.10ns= 4.75c 359 X86 :IDIV r8 4/ 4b ax upd L: 4.17ns= 9.4c T: 4.17ns= 9.42c 360 X86 :IDIV r8 0/ 4b L: [no true dep.] T: 5.01ns= 11.33c 361 X86 :IDIV r8 2^12/2^6 ax upd L: [no true dep.] T: 2.10ns= 4.75c 362 X86 :IDIV r8 1/1 L: 5.42ns= 12.3c T: 5.01ns= 11.33c 363 X86 :IDIV r8 1/1 ax upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 364 X86 :IDIV r16 30/15b (full) L: 8.92ns= 20.2c T: 8.70ns= 19.67c 365 X86 :IDIV r16 24/15b ax upd L: 8.33ns= 18.8c T: 8.33ns= 18.83c 366 X86 :IDIV r16 15/15b ax upd L: 8.33ns= 18.8c T: 8.33ns= 18.83c 367 X86 :IDIV r16 8/15b ax/dx upd L: [no true dep.] T: 2.10ns= 4.75c 368 X86 :IDIV r16 0/15b L: [no true dep.] T: 6.34ns= 14.33c 369 X86 :IDIV r16 23/ 8b ax upd L: 8.77ns= 19.8c T: 8.77ns= 19.83c 370 X86 :IDIV r16 16/ 8b ax upd L: [no true dep.] T: 8.37ns= 18.92c 371 X86 :IDIV r16 8/ 8b ax upd L: 8.33ns= 18.8c T: 8.33ns= 18.83c 372 X86 :IDIV r16 0/ 8b L: [no true dep.] T: 6.34ns= 14.33c 373 X86 :IDIV r16 2^28/2^14 ax/dx L: [no true dep.] T: 4.28ns= 9.67c 374 X86 :IDIV r16 1/1 L: 6.41ns= 14.5c T: 6.34ns= 14.33c 375 X86 :IDIV r16 1/1 ax upd L: 6.27ns= 14.2c T: 6.27ns= 14.17c 376 X86 :IDIV r16 1/1 ax/dx upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 377 X86 :IDIV r32 62/31b (full) L: 10.17ns= 23.0c T: 9.84ns= 22.25c 378 X86 :IDIV r32 62/31b 0 rem. L: 10.17ns= 23.0c T: 9.84ns= 22.25c 379 X86 :IDIV r32 48/31b eax upd L: 8.22ns= 18.6c T: 8.18ns= 18.50c 380 X86 :IDIV r32 31/31b eax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 381 X86 :IDIV r32 16/31b eax/edx L: [no true dep.] T: 2.10ns= 4.75c 382 X86 :IDIV r32 0/31b L: [no true dep.] T: 5.68ns= 12.83c 383 X86 :IDIV r32 47/16b eax upd L: 9.88ns= 22.3c T: 9.88ns= 22.33c 384 X86 :IDIV r32 32/16b eax upd L: [no true dep.] T: 8.18ns= 18.50c 385 X86 :IDIV r32 16/16b eax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 386 X86 :IDIV r32 0/16b L: [no true dep.] T: 5.68ns= 12.83c 387 X86 :IDIV r32 2^60/2^30 eax/edx L: [no true dep.] T: 5.86ns= 13.25c 388 X86 :IDIV r32 1/1 L: 5.97ns= 13.5c T: 5.68ns= 12.83c 389 X86 :IDIV r32 1/1 eax upd L: 5.68ns= 12.8c T: 5.68ns= 12.83c 390 X86 :IDIV r32 1/1 eax/edx upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 405 X86 :DIV r8 16/ 8b (full) L: 7.11ns= 16.1c T: 5.86ns= 13.25c 406 X86 :DIV r8 12/ 8b ax upd L: 2.17ns= 4.9c T: 2.10ns= 4.75c 407 X86 :DIV r8 8/ 8b ax upd L: 4.20ns= 9.5c T: 4.17ns= 9.42c 408 X86 :DIV r8 4/ 8b ax upd L: [no true dep.] T: 2.10ns= 4.75c 409 X86 :DIV r8 0/ 8b L: [no true dep.] T: 3.76ns= 8.50c 410 X86 :DIV r8 12/ 4b ax upd L: 2.17ns= 4.9c T: 2.10ns= 4.75c 411 X86 :DIV r8 8/ 4b ax upd L: [no true dep.] T: 2.10ns= 4.75c 412 X86 :DIV r8 4/ 4b ax upd L: 4.20ns= 9.5c T: 4.17ns= 9.42c 413 X86 :DIV r8 0/ 4b L: [no true dep.] T: 3.76ns= 8.50c 414 X86 :DIV r8 2^14/2^7 ax upd L: [no true dep.] T: 2.10ns= 4.75c 415 X86 :DIV r8 1/1 L: 5.01ns= 11.3c T: 3.76ns= 8.50c 416 X86 :DIV r8 1/1 ax upd L: 2.17ns= 4.9c T: 2.10ns= 4.75c 417 X86 :DIV r16 32/16b (full) L: 8.33ns= 18.8c T: 8.18ns= 18.50c 418 X86 :DIV r16 30/15b 0 rem. L: 8.33ns= 18.8c T: 8.18ns= 18.50c 419 X86 :DIV r16 24/16b ax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 420 X86 :DIV r16 16/16b ax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 421 X86 :DIV r16 8/16b ax/dx upd L: [no true dep.] T: 2.10ns= 4.75c 422 X86 :DIV r16 0/16b L: [no true dep.] T: 5.68ns= 12.83c 423 X86 :DIV r16 24/ 8b ax upd L: 8.22ns= 18.6c T: 8.22ns= 18.58c 424 X86 :DIV r16 16/ 8b ax upd L: [no true dep.] T: 7.81ns= 17.67c 425 X86 :DIV r16 8/ 8b ax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 426 X86 :DIV r16 0/ 8b L: [no true dep.] T: 5.68ns= 12.83c 427 X86 :DIV r16 1/1 L: 5.82ns= 13.2c T: 5.68ns= 12.83c 428 X86 :DIV r16 1/1 ax upd L: 5.71ns= 12.9c T: 5.71ns= 12.92c 429 X86 :DIV r16 1/1 ax/dx upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 430 X86 :DIV r32 64/32b (full) L: 9.99ns= 22.6c T: 9.84ns= 22.25c 431 X86 :DIV r32 62/31b 0 rem. L: 9.99ns= 22.6c T: 9.84ns= 22.25c 432 X86 :DIV r32 48/32b eax upd L: 8.18ns= 18.5c T: 8.18ns= 18.50c 433 X86 :DIV r32 32/32b eax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 434 X86 :DIV r32 16/32b eax/edx L: [no true dep.] T: 2.10ns= 4.75c 435 X86 :DIV r32 0/32b L: [no true dep.] T: 5.68ns= 12.83c 436 X86 :DIV r32 48/16b eax upd L: 9.88ns= 22.3c T: 9.88ns= 22.33c 437 X86 :DIV r32 32/16b eax upd L: [no true dep.] T: 8.22ns= 18.58c 438 X86 :DIV r32 16/16b eax upd L: 7.78ns= 17.6c T: 7.78ns= 17.58c 439 X86 :DIV r32 0/16b L: [no true dep.] T: 5.68ns= 12.83c 440 X86 :DIV r32 2^62/2^31 eax/edx L: [no true dep.] T: 6.38ns= 14.42c 441 X86 :DIV r32 1/1 L: 5.82ns= 13.2c T: 5.68ns= 12.83c 442 X86 :DIV r32 1/1 eax upd L: 5.68ns= 12.8c T: 5.68ns= 12.83c 443 X86 :DIV r32 1/1 eax/edx upd L: 2.10ns= 4.8c T: 2.10ns= 4.75c 458 X86 :CBW L: 0.41ns= 0.9c T: 0.41ns= 0.92c 459 X86 :CWDE L: 0.41ns= 0.9c T: 0.41ns= 0.92c 461 X86 :CWD L: 0.41ns= 0.9c T: 0.41ns= 0.92c 462 X86 :CDQ L: 0.41ns= 0.9c T: 0.41ns= 0.92c 464 X86 :CLC L: 0.14ns= 0.3c T: 0.14ns= 0.31c 465 X86 :STC L: 0.14ns= 0.3c T: 0.14ns= 0.31c 466 X86 :CMC L: 0.41ns= 0.9c T: 0.41ns= 0.92c 467 X86 :CLD L: 1.25ns= 2.8c T: 1.25ns= 2.83c 468 X86 :STD L: 5.86ns= 13.3c T: 5.86ns= 13.25c 469 X86 :AAA L: 0.41ns= 0.9c T: 0.41ns= 0.92c 470 X86 :AAD L: 0.36ns= 0.8c T: 0.36ns= 0.80c 471 X86 :AAM L: 2.51ns= 5.7c T: 2.51ns= 5.67c 472 X86 :AAS L: 0.41ns= 0.9c T: 0.41ns= 0.92c 473 X86 :DAA L: 0.41ns= 0.9c T: 0.41ns= 0.92c 474 X86 :DAS L: 0.41ns= 0.9c T: 0.41ns= 0.92c 475 X86 :LAHF L: 0.41ns= 0.9c T: 0.41ns= 0.92c 476 X86 :SAHF L: 0.41ns= 0.9c T: 0.41ns= 0.92c 477 X86 :PUSHA L: [no true dep.] T: 3.76ns= 8.50c 478 X86 :POPA L: [no true dep.] T: 3.35ns= 7.58c 479 X86 :PUSHA + POPA L: 6.34ns= 14.3c T: 6.34ns= 14.33c 480 X86 :PUSHAD L: [no true dep.] T: 3.98ns= 9.00c 481 X86 :POPAD L: [no true dep.] T: 3.87ns= 8.75c 482 X86 :PUSHAD + POPAD L: 5.86ns= 13.3c T: 5.86ns= 13.25c 483 X86 :PUSH r16 L: [no true dep.] T: 0.41ns= 0.92c 484 X86 :POP r16 L: [no true dep.] T: 0.41ns= 0.92c 485 X86 :PUSH r16 + POP r16 L: 2.10ns= 4.8c T: 1.25ns= 2.83c 486 X86 :PUSH r32 L: [no true dep.] T: 0.44ns= 1.00c 487 X86 :POP r32 L: [no true dep.] T: 0.41ns= 0.92c 488 X86 :PUSH r32 + POP r32 L: 2.10ns= 4.8c T: 1.25ns= 2.83c 489 X86 :PUSH imm8 L: [no true dep.] T: 0.44ns= 1.00c 490 X86 :PUSH imm8 + POP r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 491 X86 :PUSH imm32 L: [no true dep.] T: 0.41ns= 0.92c 492 X86 :PUSH imm32 + POP r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 493 X86 :PUSH [m16] L: [no true dep.] T: 0.55ns= 1.25c 494 X86 :POP [m16] L: [no true dep.] T: 0.55ns= 1.25c 495 X86 :PUSH [m16] + POP [m16] L: 4.24ns= 9.6c T: 1.22ns= 2.75c 496 X86 :PUSH [m32] L: [no true dep.] T: 0.63ns= 1.42c 497 X86 :POP [m32] L: [no true dep.] T: 0.44ns= 1.00c 498 X86 :PUSH [m32] + POP [m32] L: 4.24ns= 9.6c T: 1.36ns= 3.08c 499 X86 :PUSHF L: [no true dep.] T: 2.91ns= 6.58c 501 X86 :PUSHF + POPF L: 9.62ns= 21.8c T: 9.62ns= 21.75c 502 X86 :PUSHFD L: [no true dep.] T: 2.91ns= 6.58c 503 X86 :POPFD L: [no true dep.] T: 7.52ns= 17.00c 504 X86 :PUSHFD + POPFD L: 10.03ns= 22.7c T: 10.03ns= 22.67c 505 X86 :CMPSB L: 1.66ns= 3.8c T: 1.66ns= 3.75c 506 X86 :CMPSW L: 1.66ns= 3.8c T: 1.66ns= 3.75c 507 X86 :CMPSD L: 1.66ns= 3.8c T: 1.66ns= 3.75c 509 X86 :REPE CMPSB BW in L1D: 0.53 B/c 1196MiB/s 510 X86 :REPE CMPSW BW in L1D: 1.06 B/c 2392MiB/s 511 X86 :REPE CMPSD BW in L1D: 2.11 B/c 4778MiB/s 513 X86 :LODSB L: 0.85ns= 1.9c T: 0.85ns= 1.92c 514 X86 :LODSW L: 0.85ns= 1.9c T: 0.85ns= 1.92c 515 X86 :LODSD L: 0.85ns= 1.9c T: 0.92ns= 2.08c 517 X86 :REP LODSB BW in L1D: 0.35 B/c 798MiB/s 518 X86 :REP LODSW BW in L1D: 0.71 B/c 1595MiB/s 519 X86 :REP LODSD BW in L1D: 1.41 B/c 3188MiB/s 521 X86 :STOSB L: 0.44ns= 1.0c T: 0.44ns= 1.00c 522 X86 :STOSW L: 0.44ns= 1.0c T: 0.48ns= 1.08c 523 X86 :STOSD L: 0.44ns= 1.0c T: 0.52ns= 1.17c 525 X86 :REP STOSB BW in L1D: 7.96 B/c 18004MiB/s 526 X86 :REP STOSW BW in L1D: 8.15 B/c 18434MiB/s 527 X86 :REP STOSD BW in L1D: 8.18 B/c 18498MiB/s 529 X86 :MOVSB L: 1.66ns= 3.8c T: 1.66ns= 3.75c 530 X86 :MOVSW L: 1.66ns= 3.8c T: 1.66ns= 3.75c 531 X86 :MOVSD L: 1.77ns= 4.0c T: 1.70ns= 3.83c 533 X86 :REP MOVSB BW in L1D:13.58 B/c 30702MiB/s 534 X86 :REP MOVSW BW in L1D:14.03 B/c 31728MiB/s 535 X86 :REP MOVSD BW in L1D:14.08 B/c 31824MiB/s 537 X86 :SCASB L: 0.85ns= 1.9c T: 0.85ns= 1.92c 538 X86 :SCASW L: 0.85ns= 1.9c T: 0.85ns= 1.92c 539 X86 :SCASD L: 0.85ns= 1.9c T: 0.92ns= 2.08c 541 X86 :REPNE SCASB BW in L1D: 0.26 B/c 598MiB/s 542 X86 :REPNE SCASW BW in L1D: 0.53 B/c 1196MiB/s 543 X86 :REPNE SCASD BW in L1D: 1.06 B/c 2392MiB/s 545 X86 :XADD r8, r8 L: 1.40ns= 3.2c T: 0.48ns= 1.08c 546 X86 :XADD r16, r16 L: 1.40ns= 3.2c T: 0.48ns= 1.08c 547 X86 :XADD r32, r32 L: 1.40ns= 3.2c T: 0.48ns= 1.08c 549 X86 :CMPXCHG r8, r8 L: 4.17ns= 9.4c T: 2.99ns= 6.75c 550 X86 :CMPXCHG r16, r16 L: 4.17ns= 9.4c T: 2.99ns= 6.75c 551 X86 :CMPXCHG r32, r32 L: 4.17ns= 9.4c T: 2.99ns= 6.75c 553 CMPX8 :CMPXCHG8B L: 3.35ns= 7.6c T: 3.35ns= 7.58c 555 X86 :RDTSC L: [no true dep.] T: 13.38ns= 30.25c 556 X86 :CPUID (EAX = 0) L: 82.31ns=186.1c T: 82.31ns=186.08c 557 X86 :CPUID (EAX = 1) L: 112.90ns=255.3c T: 112.90ns=255.25c 569 X87 :FNOP L: [no true dep.] T: 0.41ns= 0.92c 570 X87 :FXCH st(i) L: 0.41ns= 0.9c T: 0.41ns= 0.92c 571 X87 :FCHS L: 0.41ns= 0.9c T: 0.41ns= 0.92c 572 X87 :FABS L: 0.41ns= 0.9c T: 0.41ns= 0.92c 573 X87 :FTST L: [no true dep.] T: 0.41ns= 0.92c 574 X87 :FXAM L: [no true dep.] T: 0.41ns= 0.92c 575 CMOV :FCMOVE st, st(i) L: 0.85ns= 1.9c T: 0.85ns= 1.92c 576 X87 :FADD st(i), st (st = 0.0) L: 1.33ns= 3.0c T: 0.41ns= 0.92c 577 X87 :FADD st(i), st L: 1.25ns= 2.8c T: 0.41ns= 0.92c 578 X87 :FADD st, st(i), FXCH st(i) L: 1.25ns= 2.8c T: 0.41ns= 0.92c 579 X87 :FMUL st(i), st (st = 0.0) L: 2.10ns= 4.8c T: 0.74ns= 1.67c 580 X87 :FMUL st(i), st L: 2.10ns= 4.8c T: 0.74ns= 1.67c 581 X87 :FMUL st, st(i), FXCH st(i) L: 2.10ns= 4.8c T: 0.85ns= 1.92c 582 X87 :FMUL + FADD st, st(i) L: 3.35ns= 7.6c T: [not enough reg] 583 X87 :FMUL st(2i) FADD st(2i+1) L: 2.10ns= 4.8c T: [not enough reg] 584 X87 :FDIV32 st(i), st L: 5.42ns= 12.3c T: 5.01ns= 11.33c 585 X87 :FDIV64 st(i), st L: 8.77ns= 19.8c T: 8.37ns= 18.92c 586 X87 :FDIV80 st(i), st L: 9.62ns= 21.8c T: 9.18ns= 20.75c 587 X87 :FDIV80 (0.0l/x) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 588 X87 :FDIV80 (x/1.0l) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 589 X87 :FDIV80 (x/2.0l) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 590 X87 :FDIV80 (x/0.5l) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 591 X87 :FSQRT32 st L: 5.42ns= 12.3c T: 5.01ns= 11.33c 592 X87 :FSQRT64 st L: 8.37ns= 18.9c T: 7.92ns= 17.92c 593 X87 :FSQRT80 st L: 9.62ns= 21.8c T: 9.18ns= 20.75c 594 X87 :FSQRT80 (0.0l) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 595 X87 :FSQRT80 (1.0l) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 596 X87 :FDECSTP L: [no true dep.] T: 0.41ns= 0.92c 597 X87 :FINCSTP L: [no true dep.] T: 0.41ns= 0.92c 598 X87 :FCOM st(i) L: [no true dep.] T: 0.41ns= 0.92c 599 CMOV :FCOMI st, st(i) L: [no true dep.] T: 0.41ns= 0.92c 600 X87 :FSIN80 (0.0) L: 13.38ns= 30.3c T: 13.79ns= 31.17c 601 X87 :FSIN80 (0.0) + FADD L: 15.04ns= 34.0c T: 14.19ns= 32.08c 602 X87 :FSIN80 (1.0) + FADD L: 41.76ns= 94.4c T: 43.01ns= 97.25c 603 X87 :FSIN80 (4Pi) + FADD L: 40.10ns= 90.7c T: 31.33ns= 70.83c 604 X87 :FSIN80 (2Pi) + FADD L: 40.10ns= 90.7c T: 31.33ns= 70.83c 605 X87 :FSIN80 (Pi) + FADD L: 40.10ns= 90.7c T: 31.33ns= 70.83c 606 X87 :FSIN80 (Pi/2) + FADD L: 43.05ns= 97.3c T: 35.09ns= 79.33c 607 X87 :FSIN80 (Pi/4) + FADD L: 39.70ns= 89.8c T: 40.95ns= 92.58c 608 X87 :FSIN80 (Pi/8) + FADD L: 35.94ns= 81.3c T: 37.19ns= 84.08c 609 X87 :FSIN80 (Pi/16) + FADD L: 38.00ns= 85.9c T: 29.67ns= 67.08c 610 X87 :FSIN80 (Pi/32) + FADD L: 38.00ns= 85.9c T: 29.67ns= 67.08c 611 X87 :FCOS80 (0.73908513...) L: 40.10ns= 90.7c T: 40.51ns= 91.58c 612 X87 :FCOS80 (0.73908513...)+FADD L: 40.95ns= 92.6c T: 40.95ns= 92.58c 613 X87 :FCOS80 (0.0) + FADD L: 14.63ns= 33.1c T: 14.63ns= 33.08c 614 X87 :FCOS80 (1.0) + FADD L: 38.85ns= 87.8c T: 39.25ns= 88.75c 615 X87 :FCOS80 (4Pi) + FADD L: 39.25ns= 88.8c T: 33.43ns= 75.58c 616 X87 :FCOS80 (2Pi) + FADD L: 39.25ns= 88.8c T: 33.43ns= 75.58c 617 X87 :FCOS80 (Pi) + FADD L: 39.25ns= 88.8c T: 33.43ns= 75.58c 618 X87 :FCOS80 (Pi/2) + FADD L: 36.34ns= 82.2c T: 28.82ns= 65.17c 619 X87 :FCOS80 (Pi/4) + FADD L: 36.75ns= 83.1c T: 37.19ns= 84.08c 620 X87 :FCOS80 (Pi/8) + FADD L: 40.95ns= 92.6c T: 40.95ns= 92.58c 621 X87 :FCOS80 (Pi/16) + FADD L: 37.15ns= 84.0c T: 31.77ns= 71.83c 622 X87 :FCOS80 (Pi/32) + FADD L: 37.15ns= 84.0c T: 31.74ns= 71.75c 623 MMX :EMMS L: 2.51ns= 5.7c T: 2.51ns= 5.67c 624 MMX :MOVD r32, mm L: [diff. reg. set] T: 0.21ns= 0.47c 625 MMX :MOVD mm, r32 L: [diff. reg. set] T: 0.21ns= 0.47c 626 MMX :MOVD r32, mm+MOVD mm, r32 L: 1.66ns= 3.8c T: 0.17ns= 0.39c 630 MMX :MOVD mm, [m32] L: [memory dep.] T: 0.41ns= 0.92c 631 MMX :MOVD [m32], mm L: [memory dep.] T: 0.41ns= 0.92c 632 MMX :MOVD mm,[m32]+MOVD [m32],mm L: 2.10ns= 4.8c T: 0.38ns= 0.85c 633 MMX :MOVQ mm, mm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 634 MMX :MOVQ mm, [m64] L: [memory dep.] T: 0.41ns= 0.92c 635 MMX :MOVQ [m64], mm L: [memory dep.] T: 0.41ns= 0.92c 636 MMX :MOVQ mm,[m64]+MOVQ [m64],mm L: 2.10ns= 4.8c T: 0.41ns= 0.92c 637 SSE :MOVNTQ [m64], mm L: [memory dep.] T: 0.92ns= 0.92c 638 SSE :PMOVMSKB r32, mm L: [diff. reg. set] T: 0.41ns= 0.92c 640 SSE :MASKMOVQ mm, mm L: [memory dep.] T: 10.42ns= 10.42c 641 MMX :PADDB mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 642 MMX :PADDW mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 643 MMX :PADDD mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 644 SSE2 :PADDQ mm, mm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 645 MMX :PADDSB mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 646 MMX :PADDSW mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 647 MMX :PADDUSB mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 648 MMX :PADDUSW mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 649 MMX :PSUBB mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 650 MMX :PSUBB mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 651 MMX :PSUBW mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 652 MMX :PSUBW mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 653 MMX :PSUBD mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 654 MMX :PSUBD mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 655 SSE2 :PSUBQ mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 656 SSE2 :PSUBQ mm_1, mm_2 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 657 MMX :PSUBSB mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 658 MMX :PSUBSB mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 659 MMX :PSUBSW mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 660 MMX :PSUBSW mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.46c 661 MMX :PSUBUSB mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 662 MMX :PSUBUSB mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 663 MMX :PSUBUSW mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 664 MMX :PSUBUSW mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 665 MMX :PCMPEQB mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 666 MMX :PCMPEQB mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 667 MMX :PCMPEQW mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 668 MMX :PCMPEQW mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 669 MMX :PCMPEQD mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 670 MMX :PCMPEQD mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 671 MMX :PCMPGTB mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 672 MMX :PCMPGTB mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 673 MMX :PCMPGTW mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 674 MMX :PCMPGTW mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 675 MMX :PCMPGTD mm, mm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 676 MMX :PCMPGTD mm_1, mm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 677 MMX :PAND mm, mm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 678 MMX :PAND mm_1, mm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 679 MMX :PANDN mm, mm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 680 MMX :PANDN mm_1, mm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 681 MMX :POR mm, mm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 682 MMX :POR mm_1, mm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 683 MMX :PXOR mm, mm L: 0.14ns= 0.3c T: 0.14ns= 0.31c 684 MMX :PXOR mm_1, mm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 685 MMX :PMULHW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 686 SSE :PMULHUW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 688 SSSE3 :PMULHRSW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 689 MMX :PMULLW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 690 SSE2 :PMULUDQ mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 691 SSSE3 :PMADDUBSW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 692 MMX :PMADDWD mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 693 MMX :PSLLW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 694 MMX :PSLLW mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 695 MMX :PSLLD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 696 MMX :PSLLD mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 697 MMX :PSLLQ mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 698 MMX :PSLLQ mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 699 MMX :PSRAW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 700 MMX :PSRAW mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 701 MMX :PSRAD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 702 MMX :PSRAD mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 703 MMX :PSRLW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 704 MMX :PSRLW mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 705 MMX :PSRLD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 706 MMX :PSRLD mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 707 MMX :PSRLQ mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 708 MMX :PSRLQ mm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 709 MMX :PUNPCKHBW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 710 MMX :PUNPCKHWD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 711 MMX :PUNPCKHDQ mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 712 MMX :PUNPCKLBW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 713 MMX :PUNPCKLWD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 714 MMX :PUNPCKLDQ mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 715 MMX :PACKSSWB mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 716 MMX :PACKUSWB mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 717 MMX :PACKSSDW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 751 SSE :PAVGB mm, mm L: 0.44ns= 1.0c T: 0.21ns= 0.48c 752 SSE :PAVGW mm, mm L: 0.44ns= 1.0c T: 0.21ns= 0.48c 753 SSE :PEXTRW r32, mm, im8 L: [diff. reg. set] T: 0.44ns= 1.00c 754 SSE :PINSRW mm, r32, im8 L: [diff. reg. set] T: 0.44ns= 1.00c 755 SSE :PEXTRW + PINSRW r32 L: 0.88ns= 2.0c T: 0.88ns= 2.00c 759 SSE :PMAXSW mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 760 SSE :PMAXUB mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 761 SSE :PMINSW mm, mm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 762 SSE :PMINUB mm, mm L: 0.41ns= 0.9c T: 0.18ns= 0.41c 763 SSE :PSADBW mm, mm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 764 SSE :PSHUFW mm, mm, im8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 765 SSE :PREFETCHNTA [mem] L: [memory dep.] T: 0.41ns= 0.92c 766 SSE :PREFETCHT0 [mem] L: [memory dep.] T: 0.41ns= 0.92c 767 SSE :PREFETCHT1 [mem] L: [memory dep.] T: 0.44ns= 1.00c 768 SSE :PREFETCHT2 [mem] L: [memory dep.] T: 0.44ns= 1.00c 769 SSE :SFENCE L: 3.76ns= 8.5c T: 3.76ns= 8.50c 770 SSE2 :LFENCE L: 3.35ns= 7.6c T: 3.35ns= 7.58c 771 SSE2 :MFENCE L: 2.51ns= 5.7c T: 2.51ns= 5.67c 772 SSSE3 :PABSB mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 773 SSSE3 :PABSW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 774 SSSE3 :PABSD mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 775 SSSE3 :PALIGNR mm, mm, imm8 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 776 SSSE3 :PHADDW mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 777 SSSE3 :PHADDD mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 778 SSSE3 :PHADDSW mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 779 SSSE3 :PHSUBW mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 780 SSSE3 :PHSUBD mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 781 SSSE3 :PHSUBSW mm, mm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 782 SSSE3 :PSHUFB mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 783 SSSE3 :PSIGNB mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 784 SSSE3 :PSIGNW mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 785 SSSE3 :PSIGND mm, mm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 786 SSE :MOVHLPS xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 787 SSE :MOVHLPS xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 788 SSE :MOVSS xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 789 SSE :MOVSS xmm, [m32] L: [memory dep.] T: 0.41ns= 0.92c 790 SSE :MOVSS [m32], xmm L: [memory dep.] T: 0.41ns= 0.92c 791 SSE :MOVSS LS pair L: 2.10ns= 4.8c T: 0.37ns= 0.85c 792 SSE :MOVLPS xmm, [m32] L: [memory dep.] T: 0.41ns= 0.92c 793 SSE :MOVLPS [m32], xmm L: [memory dep.] T: 0.41ns= 0.92c 794 SSE :MOVLPS LS pair L: 2.51ns= 5.7c T: 0.41ns= 0.92c 795 SSE :MOVHPS xmm, [m32] L: [memory dep.] T: 0.41ns= 0.92c 796 SSE :MOVHPS [m32], xmm L: [memory dep.] T: 0.41ns= 0.92c 797 SSE :MOVHPS LS pair L: 3.35ns= 7.6c T: 0.85ns= 1.92c 798 SSE :MOVAPS xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 799 SSE :MOVAPS xmm, [m128] L: [memory dep.] T: 0.41ns= 0.92c 800 SSE :MOVAPS [m128], xmm L: [memory dep.] T: 0.41ns= 0.92c 801 SSE :MOVAPS LS pair L: 2.10ns= 4.8c T: 0.41ns= 0.92c 802 SSE :MOVUPS xmm, xmm L: 0.41ns= 0.9c T: 0.16ns= 0.37c 803 SSE :MOVUPS xmm, [m128] L: [memory dep.] T: 0.85ns= 1.92c 804 SSE :MOVUPS [m128], xmm L: [memory dep.] T: 1.66ns= 3.75c 805 SSE :MOVUPS aligned LS pair L: 3.35ns= 7.6c T: 2.10ns= 4.75c 806 SSE :MOVUPS xmm, [m128 + 4] L: [memory dep.] T: 0.59ns= 1.33c 807 SSE :MOVUPS [m128 + 4], xmm L: [memory dep.] T: 3.02ns= 6.83c 808 SSE :MOVUPS unaligned LS pair L: 6.67ns= 15.1c T: 5.71ns= 12.92c 810 SSE :MOVNTPS [m128], xmm L: [memory dep.] T: 0.92ns= 0.92c 811 SSE :MOVMSKPS r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 812 SSE :UNPCKLPS xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 813 SSE :UNPCKHPS xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 814 SSE :SHUFPS xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 815 SSE :COMISS xmm, xmm L: [no true dep.] T: 0.41ns= 0.92c 816 SSE :UCOMISS xmm, xmm L: [no true dep.] T: 0.41ns= 0.92c 817 SSE :CMPSS xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 818 SSE :CMPPS xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 819 SSE :SUBSS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 820 SSE :SUBPS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 821 SSE :ADDSS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 822 SSE :ADDPS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 823 SSE :MULSS xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 824 SSE :MULPS xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 825 SSE :MULSS+ADDSS xmm, xmm L: 2.91ns= 6.6c T: [not enough reg] 826 SSE :MULPS+ADDPS xmm, xmm L: 2.91ns= 6.6c T: [not enough reg] 827 SSE :MULSS xm1,xm1 ADDSS xm2,xm2 L: 1.66ns= 3.8c T: [not enough reg] 828 SSE :MULPS xm1,xm1 ADDPS xm2,xm2 L: 1.66ns= 3.8c T: [not enough reg] 829 SSE :MAXSS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 830 SSE :MAXPS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 831 SSE :MINSS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 832 SSE :MINPS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 833 SSE :ANDNPS xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 834 SSE :ANDNPS xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 835 SSE :ANDPS xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 836 SSE :ANDPS xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 837 SSE :ORPS xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 838 SSE :ORPS xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 839 SSE :XORPS xmm, xmm L: 0.14ns= 0.3c T: 0.14ns= 0.31c 840 SSE :XORPS xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 841 SSE :DIVSS xmm, xmm L: 5.42ns= 12.3c T: 5.01ns= 11.33c 842 SSE :DIVSS (0.0f/x) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 843 SSE :DIVSS (x/1.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 844 SSE :DIVSS (x/2.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 845 SSE :DIVSS (x/0.5f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 846 SSE :DIVPS xmm, xmm L: 5.42ns= 12.3c T: 4.98ns= 11.25c 847 SSE :DIVPS (0.0f/x) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 848 SSE :DIVPS (x/1.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 849 SSE :DIVPS (x/2.0f) L: 2.43ns= 5.5c T: 2.10ns= 4.75c 850 SSE :DIVPS (x/0.5f) L: 2.43ns= 5.5c T: 2.10ns= 4.75c 851 SSE :SQRTSS xmm, xmm L: 5.42ns= 12.3c T: 5.01ns= 11.33c 852 SSE :SQRTSS (0.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 853 SSE :SQRTSS (1.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 854 SSE :SQRTPS xmm, xmm L: 5.42ns= 12.3c T: 5.01ns= 11.33c 855 SSE :SQRTPS (0.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 856 SSE :SQRTPS (1.0f) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 857 SSE :RCPSS xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 858 SSE :RCPPS xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 859 SSE :RSQRTSS xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 860 SSE :RSQRTPS xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 861 SSE :CVTPI2PS xmm, mm L: [diff. reg. set] T: 0.41ns= 0.92c 862 SSE :CVTPS2PI mm, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 863 SSE :CVTPS2PI + CVTPI2PS L: 2.51ns= 5.7c T: 0.85ns= 1.92c 864 SSE :CVTTPS2PI mm, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 865 SSE :CVTTPS2PI + CVTPI2PS L: 2.51ns= 5.7c T: 0.85ns= 1.92c 866 SSE :CVTSI2SS xmm, r32 L: [diff. reg. set] T: 0.41ns= 0.92c 867 SSE :CVTSS2SI r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 868 SSE :CVTSS2SI + CVTSI2SS r32 L: 2.51ns= 5.7c T: 0.85ns= 1.92c 869 SSE :CVTTSS2SI r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 870 SSE :CVTTSS2SI + CVTSI2SS r32 L: 2.51ns= 5.7c T: 0.85ns= 1.92c 876 SSE :STMXCSR [mem] L: [memory dep.] T: 8.26ns= 18.67c 877 SSE :LDMXCSR [mem] L: [memory dep.] T: 15.89ns= 35.92c 878 SSE :STMXCSR + LDMXCSR L: 22.15ns= 50.1c T: 22.15ns= 50.08c 879 SSE2 :MOVSD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 880 SSE2 :MOVSD xmm, [m64] L: [memory dep.] T: 0.41ns= 0.92c 881 SSE2 :MOVSD [m64], xmm L: [memory dep.] T: 0.41ns= 0.92c 882 SSE2 :MOVSD LS pair L: 2.10ns= 4.8c T: 0.41ns= 0.92c 883 SSE2 :MOVLPD xmm, [m64] L: [memory dep.] T: 0.41ns= 0.92c 884 SSE2 :MOVLPD [m64], xmm L: [memory dep.] T: 0.41ns= 0.92c 885 SSE2 :MOVLPD LS pair L: 2.51ns= 5.7c T: 0.41ns= 0.92c 886 SSE2 :MOVHPD xmm, [m64] L: [memory dep.] T: 0.41ns= 0.92c 887 SSE2 :MOVHPD [m64], xmm L: [memory dep.] T: 0.41ns= 0.92c 888 SSE2 :MOVHPD LS pair L: 3.35ns= 7.6c T: 0.85ns= 1.92c 889 SSE2 :MOVAPD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 890 SSE2 :MOVAPD xmm, [m128] L: [memory dep.] T: 0.41ns= 0.92c 891 SSE2 :MOVAPD [m128], xmm L: [memory dep.] T: 0.41ns= 0.92c 892 SSE2 :MOVAPD LS pair L: 2.10ns= 4.8c T: 0.41ns= 0.92c 893 SSE2 :MOVUPD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 894 SSE2 :MOVUPD xmm, [m128] L: [memory dep.] T: 0.85ns= 1.92c 895 SSE2 :MOVUPD [m128], xmm L: [memory dep.] T: 1.66ns= 3.75c 896 SSE2 :MOVUPD aligned LS pair L: 3.35ns= 7.6c T: 2.10ns= 4.75c 897 SSE2 :MOVUPD xmm, [m128 + 4] L: [memory dep.] T: 0.59ns= 1.33c 898 SSE2 :MOVUPD [m128 + 4], xmm L: [memory dep.] T: 3.02ns= 6.83c 899 SSE2 :MOVUPD unaligned LS pair L: 6.67ns= 15.1c T: 5.71ns= 12.92c 901 SSE2 :MOVNTPD [m128], xmm L: [memory dep.] T: 0.92ns= 0.92c 902 SSE2 :MOVMSKPD r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 903 SSE2 :UNPCKLPD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 904 SSE2 :UNPCKHPD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 905 SSE2 :SHUFPD xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 906 SSE2 :COMISD xmm, xmm L: [no true dep.] T: 0.41ns= 0.92c 907 SSE2 :UCOMISD xmm, xmm L: [no true dep.] T: 0.41ns= 0.92c 908 SSE2 :CMPSD xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 909 SSE2 :CMPPD xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 910 SSE2 :SUBSD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 911 SSE2 :SUBPD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 912 SSE2 :ADDSD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 913 SSE2 :ADDPD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 914 SSE2 :MULSD xmm, xmm L: 2.10ns= 4.8c T: 0.41ns= 0.92c 915 SSE2 :MULPD xmm, xmm L: 2.10ns= 4.8c T: 0.41ns= 0.92c 916 SSE2 :MULSD+ADDSD xmm, xmm L: 3.35ns= 7.6c T: [not enough reg] 917 SSE2 :MULPD+ADDPD xmm, xmm L: 3.35ns= 7.6c T: [not enough reg] 918 SSE2 :MULSD xm1,xm1 ADDSD xm2,xm2 L: 2.10ns= 4.8c T: [not enough reg] 919 SSE2 :MULPD xm1,xm1 ADDPD xm2,xm2 L: 2.10ns= 4.8c T: [not enough reg] 920 SSE2 :MAXSD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 921 SSE2 :MAXPD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 922 SSE2 :MINSD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 923 SSE2 :MINPD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 924 SSE2 :ANDNPD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 925 SSE2 :ANDNPD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 926 SSE2 :ANDPD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 927 SSE2 :ANDPD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 928 SSE2 :ORPD xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 929 SSE2 :ORPD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 930 SSE2 :XORPD xmm, xmm L: 0.14ns= 0.3c T: 0.14ns= 0.31c 931 SSE2 :XORPD xmm_1, xmm_2 L: 0.44ns= 1.0c T: 0.14ns= 0.32c 932 SSE2 :DIVSD xmm, xmm L: 8.77ns= 19.8c T: 8.37ns= 18.92c 933 SSE2 :DIVSD (0.0/x) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 934 SSE2 :DIVSD (x/1.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 935 SSE2 :DIVSD (x/2.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 936 SSE2 :DIVSD (x/0.5) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 937 SSE2 :DIVPD xmm, xmm L: 8.77ns= 19.8c T: 8.37ns= 18.92c 938 SSE2 :DIVPD (0.0/x) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 939 SSE2 :DIVPD (x/1.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 940 SSE2 :DIVPD (x/2.0) L: 2.43ns= 5.5c T: 2.10ns= 4.75c 941 SSE2 :DIVPD (x/0.5) L: 2.43ns= 5.5c T: 2.10ns= 4.75c 942 SSE2 :SQRTSD xmm, xmm L: 8.37ns= 18.9c T: 7.92ns= 17.92c 943 SSE2 :SQRTSD (0.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 944 SSE2 :SQRTSD (1.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 945 SSE2 :SQRTPD xmm, xmm L: 8.37ns= 18.9c T: 7.92ns= 17.92c 946 SSE2 :SQRTPD (0.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 947 SSE2 :SQRTPD (1.0) L: 2.51ns= 5.7c T: 2.10ns= 4.75c 948 SSE2 :CVTPI2PD xmm, mm L: [diff. reg. set] T: 0.41ns= 0.92c 949 SSE2 :CVTPD2PI mm, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 950 SSE2 :CVTPD2PI + CVTPI2PD L: 3.35ns= 7.6c T: 0.85ns= 1.92c 951 SSE2 :CVTTPD2PI mm, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 952 SSE2 :CVTTPD2PI + CVTPI2PD L: 3.35ns= 7.6c T: 0.85ns= 1.92c 953 SSE2 :CVTSI2SD xmm, r32 L: [diff. reg. set] T: 0.41ns= 0.92c 954 SSE2 :CVTSD2SI r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 955 SSE2 :CVTSD2SI + CVTSI2SD r32 L: 2.91ns= 6.6c T: 0.85ns= 1.92c 956 SSE2 :CVTTSD2SI r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 957 SSE2 :CVTTSD2SI + CVTSI2SD r32 L: 2.91ns= 6.6c T: 0.85ns= 1.92c 963 SSE2 :CVTDQ2PD xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 964 SSE2 :CVTPD2DQ xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 965 SSE2 :CVTPD2DQ + CVTDQ2PD L: 3.35ns= 7.6c T: 0.85ns= 1.92c 966 SSE2 :CVTTPD2DQ xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 967 SSE2 :CVTTPD2DQ + CVTDQ2PD L: 3.35ns= 7.6c T: 0.85ns= 1.92c 968 SSE2 :CVTDQ2PS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 969 SSE2 :CVTPS2DQ xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 970 SSE2 :CVTPS2DQ + CVTDQ2PS L: 2.51ns= 5.7c T: 0.85ns= 1.92c 971 SSE2 :CVTTPS2DQ xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 972 SSE2 :CVTTPS2DQ + CVTDQ2PS L: 2.51ns= 5.7c T: 0.85ns= 1.92c 973 SSE2 :CVTPS2PD xmm, xmm L: 0.85ns= 1.9c T: 0.85ns= 1.92c 974 SSE2 :CVTPD2PS xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 975 SSE2 :CVTPD2PS + CVTPS2PD L: 2.51ns= 5.7c T: 1.25ns= 2.83c 976 SSE2 :CVTSS2SD xmm, xmm L: 0.85ns= 1.9c T: 0.85ns= 1.92c 977 SSE2 :CVTSD2SS xmm, xmm L: 1.66ns= 3.8c T: 0.41ns= 0.92c 978 SSE2 :CVTSD2SS + CVTSS2SD L: 2.51ns= 5.7c T: 1.25ns= 2.83c 979 SSE2 :MOVD r32, xmm L: [diff. reg. set] T: 0.21ns= 0.47c 980 SSE2 :MOVD xmm, r32 L: [diff. reg. set] T: 0.21ns= 0.47c 981 SSE2 :MOVD r32, xmm+MOVD xmm, r32 L: 1.66ns= 3.8c T: 0.17ns= 0.39c 985 SSE2 :MOVD xmm, [m32] L: [memory dep.] T: 0.41ns= 0.92c 986 SSE2 :MOVD [m32], xmm L: [memory dep.] T: 0.41ns= 0.92c 987 SSE2 :MOVD LS pair L: 2.10ns= 4.8c T: 0.38ns= 0.85c 988 SSE2 :MOVQ xmm, [m64] L: [memory dep.] T: 0.41ns= 0.92c 989 SSE2 :MOVQ [m64], xmm L: [memory dep.] T: 0.41ns= 0.92c 990 SSE2 :MOVQ LS pair L: 2.10ns= 4.8c T: 0.41ns= 0.92c 991 SSE2 :MOVDQ2Q mm, xmm L: [diff. reg. set] T: 0.21ns= 0.47c 992 SSE2 :MOVQ2DQ xmm, mm L: [diff. reg. set] T: 0.21ns= 0.47c 993 SSE2 :MOVDQ2Q + MOVQ2DQ xmm, mm L: 0.85ns= 1.9c T: 0.26ns= 0.59c 994 SSE2 :MOVDQA xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 995 SSE2 :MOVDQA xmm, [m128] L: [memory dep.] T: 0.41ns= 0.92c 996 SSE2 :MOVDQA [m128], xmm L: [memory dep.] T: 0.41ns= 0.92c 997 SSE2 :MOVDQA LS pair L: 2.10ns= 4.8c T: 0.41ns= 0.92c 998 SSE2 :MOVDQU xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 999 SSE2 :MOVDQU xmm, [m128] L: [memory dep.] T: 0.85ns= 1.92c 1000 SSE2 :MOVDQU [m128], xmm L: [memory dep.] T: 1.66ns= 3.75c 1001 SSE2 :MOVDQU aligned LS pair L: 3.35ns= 7.6c T: 2.10ns= 4.75c 1002 SSE2 :MOVDQU xmm, [m128 + 4] L: [memory dep.] T: 0.59ns= 1.33c 1003 SSE2 :MOVDQU [m128 + 4], xmm L: [memory dep.] T: 3.02ns= 6.83c 1004 SSE2 :MOVDQU unaligned LS pair L: 6.67ns= 15.1c T: 5.71ns= 12.92c 1005 SSE4.1:MOVNTDQA xmm, [m128] L: [memory dep.] T: 1.00ns= 1.00c 1006 SSE2 :MOVNTDQ [m128], xmm L: [memory dep.] T: 0.92ns= 0.92c 1007 SSE4.1:MOVNTDQA + MOVNTDQ L: 0.41ns= 0.9c T: 0.92ns= 0.92c 1008 SSE2 :PMOVMSKB r32, xmm L: [diff. reg. set] T: 0.41ns= 0.92c 1010 SSE2 :MASKMOVDQU xmm, xmm L: [memory dep.] T: 5.67ns= 5.67c 1011 SSE2 :PADDB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1012 SSE2 :PADDW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1013 SSE2 :PADDD xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1014 SSE2 :PADDQ xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1015 SSE2 :PADDSB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1016 SSE2 :PADDSW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1017 SSE2 :PADDUSB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1018 SSE2 :PADDUSW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1019 SSE2 :PSUBB xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1020 SSE2 :PSUBB xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1021 SSE2 :PSUBW xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1022 SSE2 :PSUBW xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1023 SSE2 :PSUBD xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1024 SSE2 :PSUBD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1025 SSE2 :PSUBQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1026 SSE2 :PSUBQ xmm_1, xmm_2 L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1027 SSE2 :PSUBSB xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1028 SSE2 :PSUBSB xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1029 SSE2 :PSUBSW xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1030 SSE2 :PSUBSW xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1031 SSE2 :PSUBUSB xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1032 SSE2 :PSUBUSB xmm_1, xmm_2 L: 0.44ns= 1.0c T: 0.20ns= 0.44c 1033 SSE2 :PSUBUSW xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1034 SSE2 :PSUBUSW xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1035 SSE2 :PCMPEQB xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1036 SSE2 :PCMPEQB xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1037 SSE2 :PCMPEQW xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1038 SSE2 :PCMPEQW xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1039 SSE2 :PCMPEQD xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1040 SSE2 :PCMPEQD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1041 SSE4.1:PCMPEQQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1042 SSE4.1:PCMPEQQ xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1043 SSE2 :PCMPGTB xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1044 SSE2 :PCMPGTB xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1045 SSE2 :PCMPGTW xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1046 SSE2 :PCMPGTW xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1047 SSE2 :PCMPGTD xmm, xmm L: 0.21ns= 0.5c T: 0.21ns= 0.47c 1048 SSE2 :PCMPGTD xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.20ns= 0.44c 1051 SSE2 :PAND xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 1052 SSE2 :PAND xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 1053 SSE2 :PANDN xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 1054 SSE2 :PANDN xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 1055 SSE2 :POR xmm, xmm L: 0.41ns= 0.9c T: 0.14ns= 0.33c 1056 SSE2 :POR xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 1057 SSE2 :PXOR xmm, xmm L: 0.14ns= 0.3c T: 0.14ns= 0.31c 1058 SSE2 :PXOR xmm_1, xmm_2 L: 0.41ns= 0.9c T: 0.14ns= 0.31c 1059 SSE2 :PMULHW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1060 SSE2 :PMULHUW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1061 SSSE3 :PMULHRSW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1062 SSE2 :PMULLW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1063 SSE4.1:PMULLD xmm, xmm L: 2.10ns= 4.8c T: 0.85ns= 1.92c 1064 SSE4.1:PMULDQ xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1065 SSE2 :PMULUDQ xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1066 SSSE3 :PMADDUBSW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1067 SSE2 :PMADDWD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1068 SSE2 :PSLLW xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1069 SSE2 :PSLLW xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1070 SSE2 :PSLLD xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1071 SSE2 :PSLLD xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1072 SSE2 :PSLLQ xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1073 SSE2 :PSLLQ xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1074 SSE2 :PSLLDQ xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1075 SSE2 :PSRAW xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1076 SSE2 :PSRAW xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1077 SSE2 :PSRAD xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1078 SSE2 :PSRAD xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1079 SSE2 :PSRLW xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1080 SSE2 :PSRLW xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1081 SSE2 :PSRLD xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1082 SSE2 :PSRLD xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1083 SSE2 :PSRLQ xmm, xmm L: 0.85ns= 1.9c T: 0.41ns= 0.92c 1084 SSE2 :PSRLQ xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1085 SSE2 :PSRLDQ xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1086 SSE2 :PUNPCKHBW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1087 SSE2 :PUNPCKHWD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1088 SSE2 :PUNPCKHDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1089 SSE2 :PUNPCKHQDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1090 SSE2 :PUNPCKLBW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1091 SSE2 :PUNPCKLWD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1092 SSE2 :PUNPCKLDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1093 SSE2 :PUNPCKLQDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1094 SSE2 :PACKSSWB xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1095 SSE2 :PACKUSWB xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1096 SSE2 :PACKSSDW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1097 SSE4.1:PACKUSDW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1098 SSE2 :PAVGB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1099 SSE2 :PAVGW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1100 SSE4.1:PEXTRB r32, xmm, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1101 SSE4.1:PINSRB xmm, r32, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1102 SSE4.1:PEXTRB + PINSRB r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1106 SSE2 :PEXTRW r32, xmm, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1107 SSE2 :PINSRW xmm, r32, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1108 SSE2 :PEXTRW + PINSRW r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1112 SSE4.1:PEXTRD r32, xmm, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1113 SSE4.1:PINSRD xmm, r32, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1114 SSE4.1:PEXTRD + PINSRD r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1118 SSE4.1:EXTRACTPS r32, xmm, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1119 SSE4.1:INSERTPS xmm, r32, im8 L: [diff. reg. set] T: 0.41ns= 0.92c 1120 SSE4.1:EXTRACTPS + INSERTPS r32 L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1128 SSE2 :PMAXUB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1129 SSE4.1:PMAXSB xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1130 SSE4.1:PMAXUW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1131 SSE2 :PMAXSW xmm, xmm L: 0.41ns= 0.9c T: 0.18ns= 0.41c 1132 SSE4.1:PMAXUD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1133 SSE4.1:PMAXSD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1134 SSE2 :PMINUB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1135 SSE4.1:PMINSB xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1136 SSE4.1:PMINUW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1137 SSE2 :PMINSW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1138 SSE4.1:PMINUD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1139 SSE4.1:PMINSD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1140 SSE2 :PSADBW xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1141 SSSE3 :PSHUFB xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1142 SSE2 :PSHUFLW xmm, xmm, im8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1143 SSE2 :PSHUFHW xmm, xmm, im8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1144 SSE2 :PSHUFD xmm, xmm, im8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1145 SSE3 :ADDSUBPS xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1146 SSE3 :ADDSUBPD xmm, xmm L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1147 SSE3 :HADDPS xmm, xmm L: 2.91ns= 6.6c T: 0.85ns= 1.92c 1148 SSE3 :HADDPD xmm, xmm L: 2.51ns= 5.7c T: 0.41ns= 0.92c 1149 SSE3 :HSUBPS xmm, xmm L: 2.91ns= 6.6c T: 0.85ns= 1.92c 1150 SSE3 :HSUBPD xmm, xmm L: 2.51ns= 5.7c T: 0.41ns= 0.92c 1151 SSE3 :MOVSLDUP xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1152 SSE3 :MOVSHDUP xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1153 SSE3 :MOVDDUP xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1154 SSE3 :LDDQU xmm, [m128 + 4] L: [memory dep.] T: 0.59ns= 1.33c 1155 SSSE3 :PABSB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1156 SSSE3 :PABSW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1157 SSSE3 :PABSD xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1158 SSSE3 :PALIGNR xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1159 SSSE3 :PHADDD xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1160 SSSE3 :PHADDW xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1161 SSSE3 :PHADDSW xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1162 SSSE3 :PHSUBD xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1163 SSSE3 :PHSUBW xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1164 SSSE3 :PHSUBSW xmm, xmm L: 1.25ns= 2.8c T: 0.85ns= 1.92c 1165 SSSE3 :PSIGNB xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1166 SSSE3 :PSIGNW xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1167 SSSE3 :PSIGND xmm, xmm L: 0.41ns= 0.9c T: 0.20ns= 0.46c 1168 SSE4.1:BLENDPS xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1169 SSE4.1:BLENDVPS xmm, xmm L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1170 SSE4.1:BLENDPD xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1171 SSE4.1:BLENDVPD xmm, xmm L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1172 SSE4.1:PBLENDW xmm, xmm, imm8 L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1173 SSE4.1:PBLENDVB xmm, xmm L: 0.85ns= 1.9c T: 0.85ns= 1.92c 1174 SSE4.1:DPPS xmm, xmm, imm8 L: 4.61ns= 10.4c T: 0.85ns= 1.92c 1175 SSE4.1:DPPD xmm, xmm, imm8 L: 3.76ns= 8.5c T: 1.03ns= 2.33c 1176 SSE4.1:MPSADBW xmm, xmm, imm8 L: 2.10ns= 4.8c T: 0.85ns= 1.92c 1177 SSE4.1:PHMINPOSUW xmm, xmm L: 1.66ns= 3.8c T: 1.66ns= 3.75c 1178 SSE4.1:PMOVSXBW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1179 SSE4.1:PMOVSXBD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1180 SSE4.1:PMOVSXBQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1181 SSE4.1:PMOVSXWD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1182 SSE4.1:PMOVSXWQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1183 SSE4.1:PMOVSXDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1184 SSE4.1:PMOVZXBW xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1185 SSE4.1:PMOVZXBD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1186 SSE4.1:PMOVZXBQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1187 SSE4.1:PMOVZXWD xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1188 SSE4.1:PMOVZXWQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1189 SSE4.1:PMOVZXDQ xmm, xmm L: 0.41ns= 0.9c T: 0.41ns= 0.92c 1190 SSE4.1:PTEST xmm, xmm L: [no true dep.] T: 0.41ns= 0.92c 1191 SSE4.1:ROUNDSS xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1192 SSE4.1:ROUNDPS xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1193 SSE4.1:ROUNDSD xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c 1194 SSE4.1:ROUNDPD xmm, xmm, imm8 L: 1.25ns= 2.8c T: 0.41ns= 0.92c BenchInstLat exit code: 0x0708 Running time: 55 seconds.