This discussion has been locked.

You can no longer post new replies to this discussion. If you have a question you can start a new discussion

casal instruction is slower than ldxr/stlxr in glibc atomic_compare_exhchange

zca over 5 years ago

I find the cas instruction is more slower than ldxr/stlxr, i have test it with__atomic_compare_exchange

time_cnt_old = get_cnt();

for(k = 0; k < LOOP_CNT; k++)

{

countold=count;

count++;

__atomic_compare_exchange(&a,&countold, &count,0,__ATOMIC_SEQ_CST,__ATOMIC_SEQ_CST);

}

time_cnt_new = get_cnt() - time_cnt_old

you can use

gcc -lpthread -march=armv8.1-a -o test main.c as "casal " atomic instruction

gcc -lpthread -march=armv-a -o test main.c as "lsxr/stlxr " atomic instruction

the casal time is more slower than "lsxr/stlxr " :

(1) run glibc atomic armv8.1-a

0: glibc atomic cmpcxg:585019493

1: glibc atomic cmpcxg:408777308

2: glibc atomic cmpcxg:769870843

3: glibc atomic cmpcxg:149371093

4: glibc atomic cmpcxg:151365619

5: glibc atomic cmpcxg:150890346

6: glibc atomic cmpcxg:151328121

7: glibc atomic cmpcxg:156505415

8: glibc atomic cmpcxg:412924425

9: glibc atomic cmpcxg:278711677

10: glibc atomic cmpcxg:151651510

11: glibc atomic cmpcxg:279346515

12: glibc atomic cmpcxg:151173807

13: glibc atomic cmpcxg:278545998

14: glibc atomic cmpcxg:278200664

15: glibc atomic cmpcxg:277724961

16: glibc atomic cmpcxg:370065101

17: glibc atomic cmpcxg:278351668

18: glibc atomic cmpcxg:151488937

19: glibc atomic cmpcxg:151469273

(2) run custom ldxr atomic armv8-a

0: custom ldxr atomic cmpcxg:94791218

1: custom ldxr atomic cmpcxg:94722346

2: custom ldxr atomic cmpcxg:94858015

3: custom ldxr atomic cmpcxg:94658057

4: custom ldxr atomic cmpcxg:94695239

5: custom ldxr atomic cmpcxg:94687119

6: custom ldxr atomic cmpcxg:94657355

7: custom ldxr atomic cmpcxg:94666011

8: custom ldxr atomic cmpcxg:94631812

9: custom ldxr atomic cmpcxg:94835661

10: custom ldxr atomic cmpcxg:94686230

11: custom ldxr atomic cmpcxg:94797306

12: custom ldxr atomic cmpcxg:94691870

13: custom ldxr atomic cmpcxg:94685030

14: custom ldxr atomic cmpcxg:94680305

15: custom ldxr atomic cmpcxg:94759021

16: custom ldxr atomic cmpcxg:94700858

17: custom ldxr atomic cmpcxg:94715765

18: custom ldxr atomic cmpcxg:94687178

19: custom ldxr atomic cmpcxg:94662201

the casal time is also not stable, Could you help to explain this? the casal should be faster then ldxr/stlxr for atomic compare and exchange

0 Andy Neil over 5 years ago
Cancel
Vote up 0 Vote down

Cancel

0 zca over 5 years ago

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>

#define LOOP_CNT (100000000)

int a=0;
int b=0;

static unsigned long get_cnt(void)
{
	unsigned long timer_val;
	asm (   " mrs %0, cntvct_el0"
		: "=r" (timer_val)
		:
		: "memory"
	);
	return timer_val;
}


unsigned long glibc_atomic_performance(void)
{
  int k;
  int count_list[3][128*2]={0};
  int count=0;
  int countold=0;
  unsigned long long time_cnt_old = 0, time_cnt_new = 0;

  time_cnt_old = get_cnt();
  for(k = 0; k < LOOP_CNT; k++)
  {
      countold=count;
      count++;
      __atomic_compare_exchange(&a,&countold, &count,0,__ATOMIC_SEQ_CST,__ATOMIC_SEQ_CST);
  }

  time_cnt_new = get_cnt() - time_cnt_old;
  return time_cnt_new;
}

int main(int argc, char *argv[])
{
  char ch;
  unsigned long time;

	time = glibc_atomic_performance();
	printf("glibc atomic cmpcxg:%ld\n", time);
   
  return 0;
}

thanks for you reply, i have put these code , as follow:

$gcc -lpthread -march=armv8.1-a -o test cas_v3.c && taskset -c 2 ./test

glibc atomic cmpcxg:243257012

$gcc -lpthread -march=armv8-a -o test cas_v3.c && taskset -c 2 ./test

glibc atomic cmpcxg:81125905