elegante inversion.
/*------------------------------------------------------------------------------ INVERSION DE BYTE 8 BIT, LSB -> MSB ------------------------------------------------------------------------------*/ unsigned char mr; unsigned char invertir_byte (mr) { mr = (mr & 0x0F) << 4 | (mr & 0xF0) >> 4; mr = (mr & 0x33) << 2 | (mr & 0xCC) >> 2; mr = (mr & 0x55) << 1 | (mr & 0xAA) >> 1; return (mr); }
#include <reg52.h> unsigned char bdata src; sbit S0=src^0; sbit S1=src^1; sbit S2=src^2; sbit S3=src^3; sbit S4=src^4; sbit S5=src^5; sbit S6=src^6; sbit S7=src^7; unsigned char bdata dst; sbit D0=dst^0; sbit D1=dst^1; sbit D2=dst^2; sbit D3=dst^3; sbit D4=dst^4; sbit D5=dst^5; sbit D6=dst^6; sbit D7=dst^7; unsigned char invertir_byte (unsigned char mr) { src=mr; D0=S7; D1=S6; D2=S5; D3=S4; D4=S3; D5=S2; D6=S1; D7=S0; return(dst); } void main() { while(1) { P1=invertir_byte(0x33); } }
//cost 35 machine cycle //Program Size: data=11.0 xdata=0 code=61
How fast/slow is this?
unsigned char bdata src; sbit S0=src^0; sbit S1=src^1; sbit S2=src^2; sbit S3=src^3; sbit S4=src^4; sbit S5=src^5; sbit S6=src^6; sbit S7=src^7; unsigned char bdata dst; sbit D0=dst^0; sbit D1=dst^1; sbit D2=dst^2; sbit D3=dst^3; sbit D4=dst^4; sbit D5=dst^5; sbit D6=dst^6; sbit D7=dst^7; unsigned char invertir_byte (unsigned char mr) { src=mr; D0=S7; D1=S6; D2=S5; D3=S4; D4=S3; D5=S2; D6=S1; D7=S0; return(dst); } void main() { while(1) { P1=invertir_byte(0x33); } }
Jon
#include <reg52.h> unsigned char code tab[16]={0x00,0x08,0x04,0x0c,0x02,0x0a,0x06,0x0e, 0x01,0x09,0x05,0x0d,0x03,0x0b,0x07,0x0f}; unsigned char invertir_byte (unsigned char dat) { dat = tab[(dat & 0xf0)>>4] | (tab[dat & 0x0f]<<4); return dat; } void main() { while(1) { P1=invertir_byte(0x33); } }
//cost 26 machine cycle //Program Size: data=9.0 xdata=0 code=63
#include <reg52.h> unsigned char invertir_byte (unsigned char val) { unsigned char dat_b ,i; dat_b=0x00; for(i=0;i<=7;i++) { dat_b=dat_b|((val>>i)&0x01); if(i==7)break; dat_b=dat_b<<1; } val=dat_b; return(val); } void main() { while(1) { P1=invertir_byte(0x33); } }
287 cycle Program Size: data=9.0 xdata=0 code=57
#include <reg52.h> unsigned char bdata temp; sbit D0=temp^0; sbit D1=temp^1; sbit D2=temp^2; sbit D3=temp^3; sbit D4=temp^4; sbit D5=temp^5; sbit D6=temp^6; sbit D7=temp^7; unsigned char invertir_byte (unsigned char mr) { D7=mr&0x01; D6=mr&0x02; D5=mr&0x04; D4=mr&0x08; D3=mr&0x10; D2=mr&0x20; D1=mr&0x40; D0=mr&0x80; return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=59 cost 35 machine cycle.
As noted earlier, the original "beautiful code" is really great on the correct platform.
It likes barrel shifters where each shift operation takes a fixed number of clock cycles independent on number of shift steps.
It likes multiple ALU, allowing the operations to be performed concurrently before the final merge of the results.
It is free from conditional jumps, avoiding branch prediction failures in high-end processors.
It does not require the processor to have special bit instructions to operate on single bits, like the 8051 has.
A normal 8051 doesn't have a barrel shifter. And it doesn't have multiple concurrent ALU. And it does not have a pipeline but advanced branch prediction, where a failed prediction may cost many concurrent instructions. Even the fast one-clockers sees limited loss from a branch prediction failure.
Since you've started experimenting, why don't you try another approach: use the bit addressing capabilities of the 8051 and convert the byte bit-by-bit? You can use bdata and sbit keywords to achieve this.
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { bit tempb; unsigned char count,temp; for(count=8;count;count--) { tempb=mr&0x01; mr>>=1; temp<<=1; temp=temp|tempb; } return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } } Program Size: data=12.1 xdata=0 code=64 It's spend 175 clock cycles
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { mr = (mr & 0x0F) << 4 | (mr & 0xF0) >> 4; mr = (mr & 0x33) << 2 | (mr & 0xCC) >> 2; mr = (mr & 0x55) << 1 | (mr & 0xAA) >> 1; return (mr); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=123 It's spend 121 clock cycles
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { unsigned char temp; if(mr&0x80){temp=temp|0x01;} if(mr&0x40){temp=temp|0x02;} if(mr&0x20){temp=temp|0x04;} if(mr&0x10){temp=temp|0x08;} if(mr&0x08){temp=temp|0x10;} if(mr&0x04){temp=temp|0x20;} if(mr&0x02){temp=temp|0x40;} if(mr&0x01){temp=temp|0x80;} return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=85 It's spend 42 clock cycles
Shows that beauty is in the eye of the beholder.
yes, it is elegant in C, but the resulting assembler with C51 (no barrel shifter) hardly is.
Erik