This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Fast Long Roll

Here is a solution looking for a problem. It is a faster alternative to the Keil _lrol_ function, on average it requires only 30% of the time taken by _lrol_. _lrol_ is quicker when shifting one or two bits, but this function is faster on average.

It is on my Keil wish list that _lrol_ should produce in-line code (as promised in the manual) when perfoming a shift of exactly one (which is very common when implementing LFSRs etc.) and should call a faster function like this one when the shift is longer or unknown.

fast_long_roll() takes more code space than _lrol_, but deals with shifts in both directions. Unlike _lrol_, it is not affected by rolls greater than 31.

/*****************************************************************************
 *
 *  Fast Long Roll
 *
 *  Trigger:    Call by any process.
 *
 *  Input:      l       -   32-bit word to be rolled.
 *              n       -   number of bits to roll positive for left,
 *                          negative for right.
 *
 *  Output:     returns l
 *
 *  Function:   returns 32-bit word in rolled state.
 *
 *****************************************************************************/

long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
{

    ACC = n;

    #pragma ASM

        MOV     R2,A                ;
        ANL     A,#07               ;
        ADD     A,#2                ;
        MOVC    A,@A+PC             ;
        SJMP    ?table_end
        DB      0,2,4,8,16,32,64,128
?table_end:
        JZ      ?roll_x_end         ;
                                    ;
        MOV     R1,A                ;
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        MOV     B,R1                ; A = GH l = ABCDEFnn
        MUL     AB                  ; A = gh l = ABCDEFnn
        XCH     A,R7                ; A = nn l = ABCDEFgh
        MOV     R0,B                ;
        XCH     A,R6                ; A = EF l = ABCDnngh
        MOV     B,R1                ; A = EF l = ABCDnngh
        MUL     AB                  ; A = ef l = ABCDnngh
        ADD     A,R0                ; A = ef l = ABCDnngh
        XCH     A,R6                ; A = nn l = ABCDefgh
        MOV     R0,B                ;
        XCH     A,R5                ; A = CD l = ABnnefgh
        MOV     B,R1                ; A = CD l = ABnnefgh
        MUL     AB                  ; A = cd l = ABnnefgh
        ADD     A,R0                ; A = cd l = ABnnefgh
        XCH     A,R5                ; A = nn l = ABcdefgh
        MOV     R0,B                ;
        XCH     A,R4                ; A = AB l = nncdefgh
        MOV     B,R1                ; A = AB l = nncdefgh
        MUL     AB                  ; A = ab l = nncdefgh
        ADD     A,R0                ; A = ab l = nncdefgh
        XCH     A,R4                ; A = nn l = abcdefgh
                                    ;
        XCH     A,R7                ; A = gh l = abcdefnn
        ADD     A,B                 ; A = gh l = abcdefnn
        XCH     A,R7                ; A = nn l = abcdefgh
                                    ;
?roll_x_end:                        ;
        MOV     A,R2                ;
                                    ;
        JNB     Acc.3,?roll_x8_end  ;roll eight
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        XCH     A,R6                ; A = EF l = ABCDGHnn
        XCH     A,R5                ; A = CD l = ABEFGHnn
        XCH     A,R4                ; A = AB l = CDEFGHnn
        XCH     A,R7                ; A = nn l = CDEFGHAB
?roll_x8_end:                       ;
                                    ;roll sixteen
        JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                    ;        l = ABCDEFGH
        XCH     A,R4                ; A = AB l = nnCDEFGH
        XCH     A,R6                ; A = EF l = nnCDABGH
        XCH     A,R4                ; A = nn l = EFCDABGH
        XCH     A,R5                ; A = CD l = EFnnABGH
        XCH     A,R7                ; A = GH l = EFnnABCD
        XCH     A,R5                ; A = nn 1 = EFGHABCD
                                    ;
?roll_x16_end:                      ;

    #pragma ENDASM

    return( l );
}

0