Hello,
I've annotated my code as follows:
void __tsan_read8(void *addr) { IACA_START; __asm__ __volatile__(""::: "memory"); MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8); __asm__ __volatile__(""::: "memory"); IACA_END; }
However, compiler generates the following code:
00000000001b7040 <__tsan_read8>: 1b7040: 53 push %rbx 1b7041: 0f 0b ud2 1b7043: bb 6f 00 00 00 mov $0x6f,%ebx 1b7048: 64 fs 1b7049: 67 90 addr32 nop 1b704b: 48 b8 f8 ff ff ff ff movabs $0xffff83fffffffff8,%rax 1b7052: 83 ff ff 1b7055: 48 ba 00 00 00 00 00 movabs $0x20000000000,%rdx 1b705c: 02 00 00 1b705f: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9 1b7064: 48 21 f8 and %rdi,%rax 1b7067: 48 31 d0 xor %rdx,%rax 1b706a: 48 83 3c 85 00 00 00 cmpq $0xffffffffffffffff,0x0(,%rax,4) 1b7071: 00 ff 1b7073: 4c 8d 04 85 00 00 00 lea 0x0(,%rax,4),%r8 1b707a: 00 1b707b: 74 0e je 1b708b <__tsan_read8+0x4b> 1b707d: 64 48 8b 14 25 c0 73 mov %fs:0xfffffffffff973c0,%rdx 1b7084: f9 ff 1b7086: 48 85 d2 test %rdx,%rdx 1b7089: 79 15 jns 1b70a0 <__tsan_read8+0x60> 1b708b: bb de 00 00 00 mov $0xde,%ebx 1b7090: 64 fs 1b7091: 67 90 addr32 nop 1b7093: 0f 0b ud2 1b7095: 5b pop %rbx 1b7096: c3 retq 1b7097: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 1b709e: 00 00 1b70a0: 83 e7 07 and $0x7,%edi 1b70a3: 48 be ff ff ff ff ff movabs $0xffffe3ffffffffff,%rsi 1b70aa: e3 ff ff 1b70ad: 48 b9 00 00 00 00 00 movabs $0x800000000000,%rcx 1b70b4: 80 00 00 1b70b7: 48 21 d6 and %rdx,%rsi 1b70ba: 48 83 cf 18 or $0x18,%rdi 1b70be: 48 09 ce or %rcx,%rsi 1b70c1: 48 c1 e7 2a shl $0x2a,%rdi 1b70c5: 48 09 fe or %rdi,%rsi 1b70c8: 0f 28 14 85 00 00 00 movaps 0x0(,%rax,4),%xmm2 1b70cf: 00 1b70d0: 48 89 74 24 f8 mov %rsi,-0x8(%rsp) 1b70d5: 64 48 8b 3c 25 00 00 mov %fs:0x0,%rdi 1b70dc: 00 00 1b70de: f3 0f 7e 6c 24 f8 movq -0x8(%rsp),%xmm5 1b70e4: 0f 28 e2 movaps %xmm2,%xmm4 1b70e7: f3 0f 7e c5 movq %xmm5,%xmm0 1b70eb: 0f 28 0c 85 10 00 00 movaps 0x10(,%rax,4),%xmm1 1b70f2: 00 1b70f3: 0f 28 d8 movaps %xmm0,%xmm3 1b70f6: 0f c6 e1 dd shufps $0xdd,%xmm1,%xmm4 1b70fa: 0f c6 d8 55 shufps $0x55,%xmm0,%xmm3 1b70fe: f3 0f 10 05 6a 13 05 movss 0x5136a(%rip),%xmm0 # 208470 <_ZN6__tsan8MutexSet8kMaxSizeE+0xfd8> 1b7105: 00 1b7106: 0f c6 d1 88 shufps $0x88,%xmm1,%xmm2 1b710a: 0f c6 c0 00 shufps $0x0,%xmm0,%xmm0 1b710e: 66 0f eb c4 por %xmm4,%xmm0 1b7112: 66 0f 76 c3 pcmpeqd %xmm3,%xmm0 1b7116: f3 0f 7e 9f c8 73 f9 movq -0x68c38(%rdi),%xmm3 1b711d: ff 1b711e: 0f c6 db 00 shufps $0x0,%xmm3,%xmm3 1b7122: 66 0f 66 d3 pcmpgtd %xmm3,%xmm2 1b7126: 66 0f db c2 pand %xmm2,%xmm0 1b712a: 66 0f d7 c8 pmovmskb %xmm0,%ecx 1b712e: 85 c9 test %ecx,%ecx 1b7130: 0f 85 55 ff ff ff jne 1b708b <__tsan_read8+0x4b> 1b7136: 48 83 c2 01 add $0x1,%rdx 1b713a: 41 bb 01 00 00 00 mov $0x1,%r11d 1b7140: 49 ba ff ff ff ff ff movabs $0x3ffffffffff,%r10 1b7147: 03 00 00 1b714a: 48 89 d1 mov %rdx,%rcx 1b714d: 49 21 d2 and %rdx,%r10 1b7150: 64 48 89 14 25 c0 73 mov %rdx,%fs:0xfffffffffff973c0 1b7157: f9 ff 1b7159: 48 c1 e9 2a shr $0x2a,%rcx 1b715d: 83 e1 07 and $0x7,%ecx 1b7160: 83 c1 0e add $0xe,%ecx 1b7163: 49 d3 e3 shl %cl,%r11 1b7166: 49 83 eb 01 sub $0x1,%r11 1b716a: 4d 21 da and %r11,%r10 1b716d: 41 f7 c2 ff 1f 00 00 test $0x1fff,%r10d 1b7174: 0f 84 1c 04 00 00 je 1b7596 <__tsan_read8+0x556> 1b717a: 48 01 d2 add %rdx,%rdx 1b717d: 48 83 c6 01 add $0x1,%rsi 1b7181: 48 c1 ea 33 shr $0x33,%rdx 1b7185: 48 69 d2 00 00 13 01 imul $0x1130000,%rdx,%rdx 1b718c: 4a 8d 0c d2 lea (%rdx,%r10,8),%rcx 1b7190: 48 ba 00 00 00 00 00 movabs $0x600000000000,%rdx 1b7197: 60 00 00
...
You can see that the marks are inserted only around the very first fast-path in the function. And the majority of the code is moved outside of the annotated region. As the result iaca does not produce useful result.
Any suggestions on how to avoid this effect?