elegante inversion.
/*------------------------------------------------------------------------------ INVERSION DE BYTE 8 BIT, LSB -> MSB ------------------------------------------------------------------------------*/ unsigned char mr; unsigned char invertir_byte (mr) { mr = (mr & 0x0F) << 4 | (mr & 0xF0) >> 4; mr = (mr & 0x33) << 2 | (mr & 0xCC) >> 2; mr = (mr & 0x55) << 1 | (mr & 0xAA) >> 1; return (mr); }
yes, it is elegant in C, but the resulting assembler with C51 (no barrel shifter) hardly is.
Erik
Shows that beauty is in the eye of the beholder.
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { mr = (mr & 0x0F) << 4 | (mr & 0xF0) >> 4; mr = (mr & 0x33) << 2 | (mr & 0xCC) >> 2; mr = (mr & 0x55) << 1 | (mr & 0xAA) >> 1; return (mr); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=123 It's spend 121 clock cycles
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { unsigned char temp; if(mr&0x80){temp=temp|0x01;} if(mr&0x40){temp=temp|0x02;} if(mr&0x20){temp=temp|0x04;} if(mr&0x10){temp=temp|0x08;} if(mr&0x08){temp=temp|0x10;} if(mr&0x04){temp=temp|0x20;} if(mr&0x02){temp=temp|0x40;} if(mr&0x01){temp=temp|0x80;} return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=85 It's spend 42 clock cycles
#include <reg52.h> unsigned char mr; unsigned char invertir_byte (mr) { bit tempb; unsigned char count,temp; for(count=8;count;count--) { tempb=mr&0x01; mr>>=1; temp<<=1; temp=temp|tempb; } return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } } Program Size: data=12.1 xdata=0 code=64 It's spend 175 clock cycles
Since you've started experimenting, why don't you try another approach: use the bit addressing capabilities of the 8051 and convert the byte bit-by-bit? You can use bdata and sbit keywords to achieve this.
As noted earlier, the original "beautiful code" is really great on the correct platform.
It likes barrel shifters where each shift operation takes a fixed number of clock cycles independent on number of shift steps.
It likes multiple ALU, allowing the operations to be performed concurrently before the final merge of the results.
It is free from conditional jumps, avoiding branch prediction failures in high-end processors.
It does not require the processor to have special bit instructions to operate on single bits, like the 8051 has.
A normal 8051 doesn't have a barrel shifter. And it doesn't have multiple concurrent ALU. And it does not have a pipeline but advanced branch prediction, where a failed prediction may cost many concurrent instructions. Even the fast one-clockers sees limited loss from a branch prediction failure.
#include <reg52.h> unsigned char bdata temp; sbit D0=temp^0; sbit D1=temp^1; sbit D2=temp^2; sbit D3=temp^3; sbit D4=temp^4; sbit D5=temp^5; sbit D6=temp^6; sbit D7=temp^7; unsigned char invertir_byte (unsigned char mr) { D7=mr&0x01; D6=mr&0x02; D5=mr&0x04; D4=mr&0x08; D3=mr&0x10; D2=mr&0x20; D1=mr&0x40; D0=mr&0x80; return (temp); } void main() { while(1) { P1=invertir_byte(0x33); } }
Program Size: data=10.0 xdata=0 code=59 cost 35 machine cycle.
#include <reg52.h> unsigned char invertir_byte (unsigned char val) { unsigned char dat_b ,i; dat_b=0x00; for(i=0;i<=7;i++) { dat_b=dat_b|((val>>i)&0x01); if(i==7)break; dat_b=dat_b<<1; } val=dat_b; return(val); } void main() { while(1) { P1=invertir_byte(0x33); } }
287 cycle Program Size: data=9.0 xdata=0 code=57
#include <reg52.h> unsigned char code tab[16]={0x00,0x08,0x04,0x0c,0x02,0x0a,0x06,0x0e, 0x01,0x09,0x05,0x0d,0x03,0x0b,0x07,0x0f}; unsigned char invertir_byte (unsigned char dat) { dat = tab[(dat & 0xf0)>>4] | (tab[dat & 0x0f]<<4); return dat; } void main() { while(1) { P1=invertir_byte(0x33); } }
//cost 26 machine cycle //Program Size: data=9.0 xdata=0 code=63
How fast/slow is this?
unsigned char bdata src; sbit S0=src^0; sbit S1=src^1; sbit S2=src^2; sbit S3=src^3; sbit S4=src^4; sbit S5=src^5; sbit S6=src^6; sbit S7=src^7; unsigned char bdata dst; sbit D0=dst^0; sbit D1=dst^1; sbit D2=dst^2; sbit D3=dst^3; sbit D4=dst^4; sbit D5=dst^5; sbit D6=dst^6; sbit D7=dst^7; unsigned char invertir_byte (unsigned char mr) { src=mr; D0=S7; D1=S6; D2=S5; D3=S4; D4=S3; D5=S2; D6=S1; D7=S0; return(dst); } void main() { while(1) { P1=invertir_byte(0x33); } }
Jon
#include <reg52.h> unsigned char bdata src; sbit S0=src^0; sbit S1=src^1; sbit S2=src^2; sbit S3=src^3; sbit S4=src^4; sbit S5=src^5; sbit S6=src^6; sbit S7=src^7; unsigned char bdata dst; sbit D0=dst^0; sbit D1=dst^1; sbit D2=dst^2; sbit D3=dst^3; sbit D4=dst^4; sbit D5=dst^5; sbit D6=dst^6; sbit D7=dst^7; unsigned char invertir_byte (unsigned char mr) { src=mr; D0=S7; D1=S6; D2=S5; D3=S4; D4=S3; D5=S2; D6=S1; D7=S0; return(dst); } void main() { while(1) { P1=invertir_byte(0x33); } }
//cost 35 machine cycle //Program Size: data=11.0 xdata=0 code=61
Here is my last inverter byte for You.
#include <reg52.h> #include <stdio.h> #include <stdlib.h> unsigned char code Table[]= { 0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0, 0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, 0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8, 0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, 0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4, 0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, 0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC, 0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC, 0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2, 0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2, 0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA, 0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA, 0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6, 0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6, 0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE, 0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE, 0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1, 0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1, 0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9, 0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9, 0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5, 0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5, 0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED, 0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD, 0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3, 0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3, 0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB, 0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB, 0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7, 0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7, 0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF, 0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF, }; void main(void) { while(1) { P1=Table[0x33]; } }
11 cycles 280 bytes code
There is one more approach. If there are two 8-bit GPIO ports to spare, one could implement byte inversion by wiring the ports appropriately. One would then write a byte to one port and read the inverted value from the other. On low pin count MCUs, the chip manufacturers could even do the wiring internally for ports that are not pinned out. Go on, use the idea, I'm not planning to patent it :-)
An even better approach would have the processor support a configurable bit mapping between a write and a read address, allowing nibble swaps, bit reversals, ...
That was the kind of thing you could do with a Triscend.
I guess a PSoC could?
For ARM, Actel now seem to have taken up from where Triscend left off...