This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Fast Long Roll

Here is a solution looking for a problem. It is a faster alternative to the Keil _lrol_ function, on average it requires only 30% of the time taken by _lrol_. _lrol_ is quicker when shifting one or two bits, but this function is faster on average.

It is on my Keil wish list that _lrol_ should produce in-line code (as promised in the manual) when perfoming a shift of exactly one (which is very common when implementing LFSRs etc.) and should call a faster function like this one when the shift is longer or unknown.

fast_long_roll() takes more code space than _lrol_, but deals with shifts in both directions. Unlike _lrol_, it is not affected by rolls greater than 31.

/*****************************************************************************
 *
 *  Fast Long Roll
 *
 *  Trigger:    Call by any process.
 *
 *  Input:      l       -   32-bit word to be rolled.
 *              n       -   number of bits to roll positive for left,
 *                          negative for right.
 *
 *  Output:     returns l
 *
 *  Function:   returns 32-bit word in rolled state.
 *
 *****************************************************************************/

long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
{

    ACC = n;

    #pragma ASM

        MOV     R2,A                ;
        ANL     A,#07               ;
        ADD     A,#2                ;
        MOVC    A,@A+PC             ;
        SJMP    ?table_end
        DB      0,2,4,8,16,32,64,128
?table_end:
        JZ      ?roll_x_end         ;
                                    ;
        MOV     R1,A                ;
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        MOV     B,R1                ; A = GH l = ABCDEFnn
        MUL     AB                  ; A = gh l = ABCDEFnn
        XCH     A,R7                ; A = nn l = ABCDEFgh
        MOV     R0,B                ;
        XCH     A,R6                ; A = EF l = ABCDnngh
        MOV     B,R1                ; A = EF l = ABCDnngh
        MUL     AB                  ; A = ef l = ABCDnngh
        ADD     A,R0                ; A = ef l = ABCDnngh
        XCH     A,R6                ; A = nn l = ABCDefgh
        MOV     R0,B                ;
        XCH     A,R5                ; A = CD l = ABnnefgh
        MOV     B,R1                ; A = CD l = ABnnefgh
        MUL     AB                  ; A = cd l = ABnnefgh
        ADD     A,R0                ; A = cd l = ABnnefgh
        XCH     A,R5                ; A = nn l = ABcdefgh
        MOV     R0,B                ;
        XCH     A,R4                ; A = AB l = nncdefgh
        MOV     B,R1                ; A = AB l = nncdefgh
        MUL     AB                  ; A = ab l = nncdefgh
        ADD     A,R0                ; A = ab l = nncdefgh
        XCH     A,R4                ; A = nn l = abcdefgh
                                    ;
        XCH     A,R7                ; A = gh l = abcdefnn
        ADD     A,B                 ; A = gh l = abcdefnn
        XCH     A,R7                ; A = nn l = abcdefgh
                                    ;
?roll_x_end:                        ;
        MOV     A,R2                ;
                                    ;
        JNB     Acc.3,?roll_x8_end  ;roll eight
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        XCH     A,R6                ; A = EF l = ABCDGHnn
        XCH     A,R5                ; A = CD l = ABEFGHnn
        XCH     A,R4                ; A = AB l = CDEFGHnn
        XCH     A,R7                ; A = nn l = CDEFGHAB
?roll_x8_end:                       ;
                                    ;roll sixteen
        JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                    ;        l = ABCDEFGH
        XCH     A,R4                ; A = AB l = nnCDEFGH
        XCH     A,R6                ; A = EF l = nnCDABGH
        XCH     A,R4                ; A = nn l = EFCDABGH
        XCH     A,R5                ; A = CD l = EFnnABGH
        XCH     A,R7                ; A = GH l = EFnnABCD
        XCH     A,R5                ; A = nn 1 = EFGHABCD
                                    ;
?roll_x16_end:                      ;

    #pragma ENDASM

    return( l );
}

Parents
  • Slightly improved version:

    /*****************************************************************************
     *
     *  Fast Long Roll
     *
     *  Trigger:    Call by any process.
     *
     *  Input:      l       -   32-bit word to be rolled.
     *              n       -   number of bits to roll positive for left,
     *                          negative for right.
     *
     *  Output:     returns l
     *
     *  Function:   returns 32-bit word in rolled state.
     *
     *****************************************************************************/
    
    #pragma	ASM
    
    $REGUSE _fast_long_roll( A, B, R4, R5, R6, R7 )
    
    #pragma ENDASM
    
    long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
    {
    
    	ACC = n;
    
        #pragma ASM
                                        ;
            JNB     Acc.3,?roll_x8_end  ;roll eight
                                        ;            R4R5R6R7
                                        ; A = nn l = ABCDEFGH
            XCH     A,R7                ; A = GH l = ABCDEFnn
            XCH     A,R6                ; A = EF l = ABCDGHnn
            XCH     A,R5                ; A = CD l = ABEFGHnn
            XCH     A,R4                ; A = AB l = CDEFGHnn
            XCH     A,R7                ; A = nn l = CDEFGHAB
    ?roll_x8_end:                       ;
                                        ;roll sixteen
            JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                        ;        l = ABCDEFGH
            XCH     A,R4                ; A = AB l = nnCDEFGH
            XCH     A,R6                ; A = EF l = nnCDABGH
            XCH     A,R4                ; A = nn l = EFCDABGH
            XCH     A,R5                ; A = CD l = EFnnABGH
            XCH     A,R7                ; A = GH l = EFnnABCD
            XCH     A,R5                ; A = nn 1 = EFGHABCD
                                        ;
    ?roll_x16_end:                      ;
    
            ANL     A,#07               ;
            ADD     A,#2                ;
            MOVC    A,@A+PC             ;
            SJMP    ?table_end
            DB      0,2,4,8,16,32,64,128
    ?table_end:
    
            JZ      ?roll_x_end         ;
                                        ;
            MOV     R1,A                ;
                                        ;            R4R5R6R7
                                        ; A = nn l = ABCDEFGH
            XCH     A,R7                ; A = GH l = ABCDEFnn
            MOV     B,R1                ; A = GH l = ABCDEFnn
            MUL     AB                  ; A = gh l = ABCDEFnn
            XCH     A,R7                ; A = nn l = ABCDEFgh
            MOV     R0,B                ;
            XCH     A,R6                ; A = EF l = ABCDnngh
            MOV     B,R1                ; A = EF l = ABCDnngh
            MUL     AB                  ; A = ef l = ABCDnngh
            ADD     A,R0                ; A = ef l = ABCDnngh
            XCH     A,R6                ; A = nn l = ABCDefgh
            MOV     R0,B                ;
            XCH     A,R5                ; A = CD l = ABnnefgh
            MOV     B,R1                ; A = CD l = ABnnefgh
            MUL     AB                  ; A = cd l = ABnnefgh
            ADD     A,R0                ; A = cd l = ABnnefgh
            XCH     A,R5                ; A = nn l = ABcdefgh
            MOV     R0,B                ;
            XCH     A,R4                ; A = AB l = nncdefgh
            MOV     B,R1                ; A = AB l = nncdefgh
            MUL     AB                  ; A = ab l = nncdefgh
            ADD     A,R0                ; A = ab l = nncdefgh
            XCH     A,R4                ; A = nn l = abcdefgh
                                        ;
            XCH     A,R7                ; A = gh l = abcdefnn
            ADD     A,B                 ; A = gh l = abcdefnn
            XCH     A,R7                ; A = nn l = abcdefgh
                                        ;
    ?roll_x_end:                        ;
    									;
    
        #pragma ENDASM
    
        return( l );
    }
    

Reply
  • Slightly improved version:

    /*****************************************************************************
     *
     *  Fast Long Roll
     *
     *  Trigger:    Call by any process.
     *
     *  Input:      l       -   32-bit word to be rolled.
     *              n       -   number of bits to roll positive for left,
     *                          negative for right.
     *
     *  Output:     returns l
     *
     *  Function:   returns 32-bit word in rolled state.
     *
     *****************************************************************************/
    
    #pragma	ASM
    
    $REGUSE _fast_long_roll( A, B, R4, R5, R6, R7 )
    
    #pragma ENDASM
    
    long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
    {
    
    	ACC = n;
    
        #pragma ASM
                                        ;
            JNB     Acc.3,?roll_x8_end  ;roll eight
                                        ;            R4R5R6R7
                                        ; A = nn l = ABCDEFGH
            XCH     A,R7                ; A = GH l = ABCDEFnn
            XCH     A,R6                ; A = EF l = ABCDGHnn
            XCH     A,R5                ; A = CD l = ABEFGHnn
            XCH     A,R4                ; A = AB l = CDEFGHnn
            XCH     A,R7                ; A = nn l = CDEFGHAB
    ?roll_x8_end:                       ;
                                        ;roll sixteen
            JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                        ;        l = ABCDEFGH
            XCH     A,R4                ; A = AB l = nnCDEFGH
            XCH     A,R6                ; A = EF l = nnCDABGH
            XCH     A,R4                ; A = nn l = EFCDABGH
            XCH     A,R5                ; A = CD l = EFnnABGH
            XCH     A,R7                ; A = GH l = EFnnABCD
            XCH     A,R5                ; A = nn 1 = EFGHABCD
                                        ;
    ?roll_x16_end:                      ;
    
            ANL     A,#07               ;
            ADD     A,#2                ;
            MOVC    A,@A+PC             ;
            SJMP    ?table_end
            DB      0,2,4,8,16,32,64,128
    ?table_end:
    
            JZ      ?roll_x_end         ;
                                        ;
            MOV     R1,A                ;
                                        ;            R4R5R6R7
                                        ; A = nn l = ABCDEFGH
            XCH     A,R7                ; A = GH l = ABCDEFnn
            MOV     B,R1                ; A = GH l = ABCDEFnn
            MUL     AB                  ; A = gh l = ABCDEFnn
            XCH     A,R7                ; A = nn l = ABCDEFgh
            MOV     R0,B                ;
            XCH     A,R6                ; A = EF l = ABCDnngh
            MOV     B,R1                ; A = EF l = ABCDnngh
            MUL     AB                  ; A = ef l = ABCDnngh
            ADD     A,R0                ; A = ef l = ABCDnngh
            XCH     A,R6                ; A = nn l = ABCDefgh
            MOV     R0,B                ;
            XCH     A,R5                ; A = CD l = ABnnefgh
            MOV     B,R1                ; A = CD l = ABnnefgh
            MUL     AB                  ; A = cd l = ABnnefgh
            ADD     A,R0                ; A = cd l = ABnnefgh
            XCH     A,R5                ; A = nn l = ABcdefgh
            MOV     R0,B                ;
            XCH     A,R4                ; A = AB l = nncdefgh
            MOV     B,R1                ; A = AB l = nncdefgh
            MUL     AB                  ; A = ab l = nncdefgh
            ADD     A,R0                ; A = ab l = nncdefgh
            XCH     A,R4                ; A = nn l = abcdefgh
                                        ;
            XCH     A,R7                ; A = gh l = abcdefnn
            ADD     A,B                 ; A = gh l = abcdefnn
            XCH     A,R7                ; A = nn l = abcdefgh
                                        ;
    ?roll_x_end:                        ;
    									;
    
        #pragma ENDASM
    
        return( l );
    }
    

Children
No data