This discussion has been locked.

You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Fast Long Roll

Here is a solution looking for a problem. It is a faster alternative to the Keil _lrol_ function, on average it requires only 30% of the time taken by _lrol_. _lrol_ is quicker when shifting one or two bits, but this function is faster on average.

It is on my Keil wish list that _lrol_ should produce in-line code (as promised in the manual) when perfoming a shift of exactly one (which is very common when implementing LFSRs etc.) and should call a faster function like this one when the shift is longer or unknown.

fast_long_roll() takes more code space than _lrol_, but deals with shifts in both directions. Unlike _lrol_, it is not affected by rolls greater than 31.

/*****************************************************************************
 *
 *  Fast Long Roll
 *
 *  Trigger:    Call by any process.
 *
 *  Input:      l       -   32-bit word to be rolled.
 *              n       -   number of bits to roll positive for left,
 *                          negative for right.
 *
 *  Output:     returns l
 *
 *  Function:   returns 32-bit word in rolled state.
 *
 *****************************************************************************/

long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
{

    ACC = n;

    #pragma ASM

        MOV     R2,A                ;
        ANL     A,#07               ;
        ADD     A,#2                ;
        MOVC    A,@A+PC             ;
        SJMP    ?table_end
        DB      0,2,4,8,16,32,64,128
?table_end:
        JZ      ?roll_x_end         ;
                                    ;
        MOV     R1,A                ;
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        MOV     B,R1                ; A = GH l = ABCDEFnn
        MUL     AB                  ; A = gh l = ABCDEFnn
        XCH     A,R7                ; A = nn l = ABCDEFgh
        MOV     R0,B                ;
        XCH     A,R6                ; A = EF l = ABCDnngh
        MOV     B,R1                ; A = EF l = ABCDnngh
        MUL     AB                  ; A = ef l = ABCDnngh
        ADD     A,R0                ; A = ef l = ABCDnngh
        XCH     A,R6                ; A = nn l = ABCDefgh
        MOV     R0,B                ;
        XCH     A,R5                ; A = CD l = ABnnefgh
        MOV     B,R1                ; A = CD l = ABnnefgh
        MUL     AB                  ; A = cd l = ABnnefgh
        ADD     A,R0                ; A = cd l = ABnnefgh
        XCH     A,R5                ; A = nn l = ABcdefgh
        MOV     R0,B                ;
        XCH     A,R4                ; A = AB l = nncdefgh
        MOV     B,R1                ; A = AB l = nncdefgh
        MUL     AB                  ; A = ab l = nncdefgh
        ADD     A,R0                ; A = ab l = nncdefgh
        XCH     A,R4                ; A = nn l = abcdefgh
                                    ;
        XCH     A,R7                ; A = gh l = abcdefnn
        ADD     A,B                 ; A = gh l = abcdefnn
        XCH     A,R7                ; A = nn l = abcdefgh
                                    ;
?roll_x_end:                        ;
        MOV     A,R2                ;
                                    ;
        JNB     Acc.3,?roll_x8_end  ;roll eight
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        XCH     A,R6                ; A = EF l = ABCDGHnn
        XCH     A,R5                ; A = CD l = ABEFGHnn
        XCH     A,R4                ; A = AB l = CDEFGHnn
        XCH     A,R7                ; A = nn l = CDEFGHAB
?roll_x8_end:                       ;
                                    ;roll sixteen
        JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                    ;        l = ABCDEFGH
        XCH     A,R4                ; A = AB l = nnCDEFGH
        XCH     A,R6                ; A = EF l = nnCDABGH
        XCH     A,R4                ; A = nn l = EFCDABGH
        XCH     A,R5                ; A = CD l = EFnnABGH
        XCH     A,R7                ; A = GH l = EFnnABCD
        XCH     A,R5                ; A = nn 1 = EFGHABCD
                                    ;
?roll_x16_end:                      ;

    #pragma ENDASM

    return( l );
}

0 Graham Cole over 22 years ago

Slightly improved version:

/*****************************************************************************
 *
 *  Fast Long Roll
 *
 *  Trigger:    Call by any process.
 *
 *  Input:      l       -   32-bit word to be rolled.
 *              n       -   number of bits to roll positive for left,
 *                          negative for right.
 *
 *  Output:     returns l
 *
 *  Function:   returns 32-bit word in rolled state.
 *
 *****************************************************************************/

#pragma	ASM

$REGUSE _fast_long_roll( A, B, R4, R5, R6, R7 )

#pragma ENDASM

long unsigned int fast_long_roll( long unsigned int l, signed char n ) small
{

	ACC = n;

    #pragma ASM
                                    ;
        JNB     Acc.3,?roll_x8_end  ;roll eight
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        XCH     A,R6                ; A = EF l = ABCDGHnn
        XCH     A,R5                ; A = CD l = ABEFGHnn
        XCH     A,R4                ; A = AB l = CDEFGHnn
        XCH     A,R7                ; A = nn l = CDEFGHAB
?roll_x8_end:                       ;
                                    ;roll sixteen
        JNB     Acc.4,?roll_x16_end ;            R4R5R6R7
                                    ;        l = ABCDEFGH
        XCH     A,R4                ; A = AB l = nnCDEFGH
        XCH     A,R6                ; A = EF l = nnCDABGH
        XCH     A,R4                ; A = nn l = EFCDABGH
        XCH     A,R5                ; A = CD l = EFnnABGH
        XCH     A,R7                ; A = GH l = EFnnABCD
        XCH     A,R5                ; A = nn 1 = EFGHABCD
                                    ;
?roll_x16_end:                      ;

        ANL     A,#07               ;
        ADD     A,#2                ;
        MOVC    A,@A+PC             ;
        SJMP    ?table_end
        DB      0,2,4,8,16,32,64,128
?table_end:

        JZ      ?roll_x_end         ;
                                    ;
        MOV     R1,A                ;
                                    ;            R4R5R6R7
                                    ; A = nn l = ABCDEFGH
        XCH     A,R7                ; A = GH l = ABCDEFnn
        MOV     B,R1                ; A = GH l = ABCDEFnn
        MUL     AB                  ; A = gh l = ABCDEFnn
        XCH     A,R7                ; A = nn l = ABCDEFgh
        MOV     R0,B                ;
        XCH     A,R6                ; A = EF l = ABCDnngh
        MOV     B,R1                ; A = EF l = ABCDnngh
        MUL     AB                  ; A = ef l = ABCDnngh
        ADD     A,R0                ; A = ef l = ABCDnngh
        XCH     A,R6                ; A = nn l = ABCDefgh
        MOV     R0,B                ;
        XCH     A,R5                ; A = CD l = ABnnefgh
        MOV     B,R1                ; A = CD l = ABnnefgh
        MUL     AB                  ; A = cd l = ABnnefgh
        ADD     A,R0                ; A = cd l = ABnnefgh
        XCH     A,R5                ; A = nn l = ABcdefgh
        MOV     R0,B                ;
        XCH     A,R4                ; A = AB l = nncdefgh
        MOV     B,R1                ; A = AB l = nncdefgh
        MUL     AB                  ; A = ab l = nncdefgh
        ADD     A,R0                ; A = ab l = nncdefgh
        XCH     A,R4                ; A = nn l = abcdefgh
                                    ;
        XCH     A,R7                ; A = gh l = abcdefnn
        ADD     A,B                 ; A = gh l = abcdefnn
        XCH     A,R7                ; A = nn l = abcdefgh
                                    ;
?roll_x_end:                        ;
									;

    #pragma ENDASM

    return( l );
}

0 Drew Davis over 22 years ago

Well, I happen to use _lrol_ in my code to calculate an MD5 hash, so there's at least one existign problem to match your solution. And I'd use the LFSR code if I didn't already have a PRNG in hardware.

I'm a bit desperate for space instead of speed, so I'll have to investigate closely.

Thanks for posting the code.
Cancel
Vote up 0 Vote down

Cancel
0 Graham Cole over 22 years ago in reply to Drew Davis

Drew: interestingly, MD5 hash was my original motivation for this code. If you want to discuss MD5 etc e-mail me on graham.cole@bt.com
Cancel
Vote up 0 Vote down

Cancel
0 Graham Cole over 22 years ago in reply to Graham Cole
Oops, that should have been:

#pragma ASM $REGUSE _fast_long_roll( A, B, R0, R4, R5, R6, R7 ) #pragma ENDASM
Cancel
Vote up 0 Vote down

Cancel
0 Graham Cole over 22 years ago in reply to Graham Cole

A collection of fast (non-iterative) 32 and 16-bit shift and roll functions can now be found at:

http://www.programmersheaven.com/zone5/cat27/31937.htm
Cancel
Vote up 0 Vote down

Cancel