By gcc docs: x86-transactional-memory-intrinsics.html, when transaction failed/abort, _xbegin() should return a abort status . However, I find it return 0 sometimes. And the frequency is very high. What kind of situation that **_xbegin()**will return 0?
After checking manual, I find many situations may cause this result. For example, CPUID, SYSTEMCALL, CFLUSH.etc. However, I don't think my code has triggered any of them.
Here is my code: Simulating a small bank, a random account transfer 1$ to another account.
#include "immintrin.h"#include <thread>#include <unistd.h>#include <iostream>using namespace std;#define n_threads 1#define OPSIZE 1000000000typedef struct Account{ long balance; long number;} __attribute__((aligned(64))) account_t;typedef struct Bank{ account_t* accounts; long size;} bank_t;bool done = 0;long *tx, *_abort, *capacity, *debug, *failed, *conflict, *zero;void* f1(bank_t* bank, int id){ for(int i=0; i<OPSIZE; i++){ int src = rand()%bank->size; int dst = rand()%bank->size; while(src == dst){ dst = rand()%bank->size; } while(true){ unsigned stat = _xbegin(); if(stat == _XBEGIN_STARTED){ bank->accounts[src].balance++; bank->accounts[dst].balance--; _xend(); asm volatile("":::"memory"); tx[id]++; break; }else{ _abort[id]++; if (stat == 0){ zero[id]++; } if (stat & _XABORT_CONFLICT){ conflict[id]++; } if (stat & _XABORT_CAPACITY){ capacity[id]++; } if (stat & _XABORT_DEBUG){ debug[id]++; } if ((stat & _XABORT_RETRY) == 0){ failed[id]++; break; } if (stat & _XABORT_NESTED){ printf("[ PANIC ] _XABORT_NESTED\n"); exit(-1); } if (stat & _XABORT_EXPLICIT){ printf("[ panic ] _XBEGIN_EXPLICIT\n"); exit(-1); } } } } return NULL;}void* f2(bank_t* bank){ printf("_heartbeat function\n"); long last_txs=0, last_aborts=0, last_capacities=0, last_debugs=0, last_faileds=0, last_conflicts=0, last_zeros = 0; long txs=0, aborts=0, capacities=0, debugs=0, faileds=0, conflicts=0, zeros = 0; while(1){ last_txs = txs; last_aborts = aborts; last_capacities = capacities; last_debugs = debugs; last_conflicts = conflicts; last_faileds = faileds; last_zeros = zeros; txs=aborts=capacities=debugs=faileds=conflicts=zeros = 0; for(int i=0; i<n_threads; i++){ txs += tx[i]; aborts += _abort[i]; faileds += failed[i]; capacities += capacity[i]; debugs += debug[i]; conflicts += conflict[i]; zeros += zero[i]; } printf("txs\t%ld\taborts\t\t%ld\tfaileds\t%ld\tcapacities\t%ld\tdebugs\t%ld\tconflit\t%ld\tzero\t%ld\n", txs - last_txs, aborts - last_aborts , faileds - last_faileds, capacities- last_capacities, debugs - last_debugs, conflicts - last_conflicts, zeros- last_zeros); sleep(1); }}int main(int argc, char** argv){ int accounts = 10240; bank_t* bank = new bank_t; bank->accounts = new account_t[accounts]; bank->size = accounts; for(int i=0; i<accounts; i++){ bank->accounts[i].number = i; bank->accounts[i].balance = 0; } thread* pid[n_threads]; tx = new long[n_threads]; _abort = new long[n_threads]; capacity = new long[n_threads]; debug = new long[n_threads]; failed = new long[n_threads]; conflict = new long[n_threads]; zero = new long[n_threads]; thread* _heartbeat = new thread(f2, bank); for(int i=0; i<n_threads; i++){ tx[i] = _abort[i] = capacity[i] = debug[i] = failed[i] = conflict[i] = zero[i] = 0; pid[i] = new thread(f1, bank, i); }// sleep(5); for(int i=0; i<n_threads;i++){ pid[i]->join(); } return 0;}
Supplements:
- All accounts is 64bit aligned. I printed bank->accounts[0], bank->accounts1 address. 0xf41080,0xf410c0。
- Using -O0 and
asm volatile("":::"memory");
therefore there is no instruction reordering problems. Abort rate increases at time. Here is the result
txs 84 aborts 0 faileds 0 capacities 0 debugs 0 conflit 0 zero 0txs 17070804 aborts 71 faileds 68 capacities 9 debugs 0 conflit 3 zero 59txs 58838 aborts 9516662 faileds 9516661 capacities 0 debugs 0 conflit 1 zero 9516661txs 0 aborts 9550428 faileds 9550428 capacities 0 debugs 0 conflit 0 zero 9550428txs 0 aborts 9549254 faileds 9549254 capacities 0 debugs 0 conflit 0 zero 9549254
Even through n_threads is 1, the result is same.
If I add coarse lock after fallback as follow, the result seems be correct.
int fallback_lock;bool rtm_begin(int id){ while(true) { unsigned stat; stat = _xbegin (); if(stat == _XBEGIN_STARTED) { return true; } else { _abort[id]++; if (stat == 0){ zero[id]++; } //call some fallback function if (stat& _XABORT_CONFLICT){ conflict[id]++; } //will not succeed on a retry if ((stat & _XABORT_RETRY) == 0) { failed[id]++; //grab a fallback lock while (!__sync_bool_compare_and_swap(&fallback_lock,0,1)) { } return false; } } }}....in_rtm = rtm_begin(id);y = fallback_lock;accounts[src].balance--;accounts[dst].balance++;if (in_rtm){ _xend();}else{ while(!__sync_bool_compare_and_swap(&fallback_lock, 1, 0)){ }}