<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="https://community.arm.com/utility/feedstylesheets/rss.xsl" media="screen"?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:wfw="http://wellformedweb.org/CommentAPI/"><channel><title>Fast 32-bit and 16-bit Division</title><link>https://community.arm.com/developer/tools-software/tools/f/keil-forum/42240/fast-32-bit-and-16-bit-division</link><description> Here are two assembly language functions which perform division of 32-bit and 16-bit numbers by values in the range 1…16 inclusive. 
 
Although the range of the divisor is very limited, these functions are significantly faster than conventional division</description><dc:language>en-US</dc:language><generator>Telligent Community 10</generator><item><title>RE: Fast 32-bit and 16-bit Division</title><link>https://community.arm.com/thread/110689?ContentTypeID=1</link><pubDate>Tue, 27 Jan 2004 07:59:11 GMT</pubDate><guid isPermaLink="false">dd9e70c8-6d3c-4c71-b136-2456382a7b5c:e1000165-4b78-4daf-9351-5f369e46950e</guid><dc:creator>Graham Cole</dc:creator><description>&lt;p&gt;I have posted updated versions of these functions here:&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://www.programmersheaven.com/zone5/cat27/32144.htm" target="_blank"&gt;http://www.programmersheaven.com/zone5/cat27/32144.htm&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
The versions posted on the Keil forum contain a minor bug in that the remainder is not returned correctly. The versions at Programmers Heaven are correct and include a radix conversion function.&lt;br /&gt;
&lt;br /&gt;
I have posted some other nuggets which can be found here:&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://www.programmersheaven.com/zone5/cat27/index.htm" target="_blank"&gt;http://www.programmersheaven.com/zone5/cat27/index.htm&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
Each of my contributions has the epithet &lt;b&gt;&lt;i&gt;fast&lt;/i&gt;&lt;/b&gt;.&lt;/p&gt;&lt;div style="clear:both;"&gt;&lt;/div&gt;</description></item><item><title>RE: Fast 32-bit and 16-bit Division</title><link>https://community.arm.com/thread/102834?ContentTypeID=1</link><pubDate>Fri, 09 Jan 2004 12:48:30 GMT</pubDate><guid isPermaLink="false">dd9e70c8-6d3c-4c71-b136-2456382a7b5c:a52606df-0880-4bfc-9d03-d86f4763daf2</guid><dc:creator>Graham Cole</dc:creator><description>&lt;p&gt;And a test harness:&lt;br /&gt;
&lt;pre&gt;

extern unsigned int fast_int_divide( unsigned int dividend,
                                     unsigned char divisor ) small;

extern unsigned long int fast_long_divide( unsigned long int dividend,
                                           unsigned char divisor ) small;

unsigned int compiler_int_divide( unsigned int dividend,
                                  unsigned char divisor )
{
    return( dividend / divisor );
}

unsigned int new_int_divide( unsigned int dividend,
                             unsigned char divisor )
{
    return( fast_int_divide( dividend, divisor ) );
}

unsigned long int compiler_long_divide( unsigned long int dividend,
                                        unsigned char divisor )
{
    return( dividend / divisor );
}

unsigned long int new_long_divide( unsigned long int dividend,
                                   unsigned char divisor )
{
    return( fast_long_divide( dividend, divisor ) );
}

unsigned long int random()
{
    static unsigned int x;

    x = ( ( 31 * x ) + 123 );

    if( x == 0xFFFF ) x = 0;

    return(x);
}


void main( void )
{
    unsigned int      loop;
    unsigned int      d;
    unsigned int      sd;
    unsigned int      i;
    unsigned long int l;
    unsigned long int lr;
    unsigned char     dummy;

    dummy = 0;

    lr = new_long_divide( new_long_divide(  0x12345678, 6 ), 10 );

    for( loop = 0; loop &amp;lt; 0xFFFF; loop++ )
    {
        i = random();
        d = random();

        if( d == 0 ) d = 1;

        l = ( (unsigned long int)
              ( random() &amp;amp; 0xFF ) &amp;lt;&amp;lt; ( random() % 24 ) );

        sd = random() % 17;   // small divisor always in range 1...16

        if( sd == 0 ) sd = 1;

        if( new_int_divide( i, sd ) != compiler_int_divide( i, sd ) )
        {
            dummy++;
        }

        if( new_long_divide( l, sd ) != compiler_long_divide( l, sd ) )
        {
            dummy++;
        }
    }
}
&lt;/pre&gt;&lt;/p&gt;&lt;div style="clear:both;"&gt;&lt;/div&gt;</description></item><item><title>RE: Fast 32-bit and 16-bit Division</title><link>https://community.arm.com/thread/72197?ContentTypeID=1</link><pubDate>Fri, 09 Jan 2004 12:47:29 GMT</pubDate><guid isPermaLink="false">dd9e70c8-6d3c-4c71-b136-2456382a7b5c:8a8ddef1-6791-48b7-9358-990da1c65ad5</guid><dc:creator>Graham Cole</dc:creator><description>&lt;p&gt;&lt;pre&gt;
//
//  fast_int_divide()
//
//  This function will divide an unsigned 16-value in registers
//  R6/R7 by a value in the range 1...16 in register R5. The
//  function uses the currently selected register bank.
//
//  This function requires about 60% of the processing time compared
//  with the already well optimised Keil compiler unsigned long
//  divide function and should be much faster than most general
//  purpose 16-bit by 16-bit divides.
//
//  This function also returns the remainder in R4/R5 (R4 will
//  always be zero.
//

#pragma ASM

    $REGUSE _fast_int_divide( R0, R1, R3, R4, R5, R6, R7, A, B )

#pragma ENDASM

unsigned int fast_int_divide( unsigned int dividend,
                              unsigned char divisor ) small
{
    // dividend in:           R6,R7
    // divisor in:            R5
    // uses:                  R0, R1, R3
    // quotient returned in:  R6,R7
    // remainder returned in: R4,R5

    divisor = divisor;

    #pragma ASM

?fast_int_divide:                       ;
                                        ;
        MOV     A,R5                    ;
        JZ      ?tiny_divisor_end       ;if divide by zero: terminate.
                                        ;
        MOV     A,PSW                   ;Get the base address of
        ANL     A,#0x18                 ; the current register bank.
        ORL     A,#0x06                 ;Point to register R6.
        MOV     R0,A                    ;R0 is a pointer to R6.
                                        ;
        MOV     R1,#02                  ;load loop counter.
                                        ;
        MOV     R4,#00                  ;Clear the running remainder.
                                        ;
?fast_int_divide_loop:                  ;
                                        ;
        MOV     A,@R0                   ;Get the MS nibble of the dividend
        SWAP    A                       ; into the accumulator.
        ANL     A,#0x0F                 ;
        ORL     A,R4                    ;
                                        ;
        MOV     B,R5                    ;Divide MS nibble by the divisor.
        DIV     AB                      ;
                                        ;
        SWAP    A                       ;Save partial result (which must be in
        MOV     R3,A                    ; the range 0...15) shifted 4 bits.
                                        ;
        MOV     A,B                     ;Save remainder (which must be in
        SWAP    A                       ; the range 0...15) shifted 4 bits left.
        MOV     R4,A                    ;
                                        ;
        MOV     A,@R0                   ;Get next nibble of the dividend
        ANL     A,#0x0F                 ; into the accumulator.
        ORL     A,R4                    ;Or in the remainder from previous
                                        ; partial division.
        MOV     B,R5                    ;Divide by the divisor.
        DIV     AB                      ;
                                        ;
        ORL     A,R3                    ;Or in the previously saved partial result.
        MOV     @R0,A                   ;Save the MSB of the quotient in R6/R7
                                        ; ready to be returned.
        MOV     A,B                     ;Save remainder shifted 4 bits left.
        SWAP    A                       ;
        MOV     R4,A                    ;
                                        ;
        INC     R0                      ;

        DJNZ    R1,?fast_int_divide_loop

        CLR     A                       ;Put remainder in R4/R5
        XCH     A,R4                    ;
        MOV     R5,A                    ;
                                        ;
?tiny_divisor_end:                      ;
                                        ;
        RET                             ;
                                        ;
    #pragma ENDASM

    return( dividend );

}
&lt;/pre&gt;&lt;/p&gt;&lt;div style="clear:both;"&gt;&lt;/div&gt;</description></item><item><title>RE: Fast 32-bit and 16-bit Division</title><link>https://community.arm.com/thread/41496?ContentTypeID=1</link><pubDate>Fri, 09 Jan 2004 12:46:34 GMT</pubDate><guid isPermaLink="false">dd9e70c8-6d3c-4c71-b136-2456382a7b5c:eaabccbb-fcbe-4059-81f7-bee2830d3271</guid><dc:creator>Graham Cole</dc:creator><description>&lt;p&gt;&lt;pre&gt;
//
//  fast_long_divide()
//
//  This function will divide an unsigned 32-value in registers
//  R4...R7 by a value in the range 1...16 in register R3. The
//  function uses the currently selected register bank.
//
//  For Keil parameter passing compatibility, the divisor is loaded
//  into R3 from memory.
//
//  This function requires about 25% of the processing time compared
//  with the already well optimised Keil compiler unsigned long
//  divide function and should be much faster than any general
//  purpose 32-bit by 32-bit divide.
//
//  DPL is used, but its value is preserved on the stack. This is
//  not essential, but is frequently helpful for compiler
//  optimisation.
//
//  This function also returns the remainder in R0...R3
//  (R0...R2) will always be zero.
//

#pragma ASM

    $REGUSE _fast_long_divide( R0, R1, R2, R3, R4, R5, R6, R7, A, B )

#pragma ENDASM

unsigned long int fast_long_divide( unsigned long int dividend,
                                    unsigned char divisor ) small
{
    divisor = divisor;

    #pragma ASM

        MOV     R3,?_fast_long_divide?BYTE+4

?fast_long_divide:                      ;

    // dividend in:           R4,R5,R6,R7
    // divisor in:            R3
    // uses:                  R0, R1, R2, A, B and DPL
    // quotient returned in:  R4,R5,R6,R7
    // remainder returned in: R0,R1,R2,R3

        PUSH    DPL                     ;
                                        ;
        MOV     A,PSW                   ;Get the base address of
        ANL     A,#0x18                 ; the current register bank.
        ORL     A,#0x04                 ;Point to register R4.
        MOV     R0,A                    ;R0 is a pointer to R4.
                                        ;
        MOV     DPL,#04                 ;load loop counter.
                                        ;
        MOV     R2,#00                  ;Clear the running remainder.
                                        ;
?fast_long_divide_loop:                 ;
                                        ;
        MOV     A,@R0                   ;Get the MS nibble of the
        SWAP    A                       ; dividend into the accumulator.
        ANL     A,#0x0F                 ;
        ORL     A,R2                    ;
                                        ;
        MOV     B,R3                    ;Divide MS nibble by the divisor.
        DIV     AB                      ;
                                        ;
        SWAP    A                       ;Save partial result (which must be
        MOV     R1,A                    ; in the range 0...15) shifted 4 bits.
                                        ;
        MOV     A,B                     ;Save remainder (which must be in
        SWAP    A                       ; the range 0...15) shifted 4 bits left.
        MOV     R2,A                    ;
                                        ;
        MOV     A,@R0                   ;Get next nibble of the dividend into
        ANL     A,#0x0F                 ; the accumulator.
        ORL     A,R2                    ;Or in the remainder from previous
                                        ; partial division.
        MOV     B,R3                    ;Divide by the divisor.
        DIV     AB                      ;
                                        ;
        ORL     A,R1                    ;Or in the previously saved partial result.
        MOV     @R0,A                   ;Save the MSB of the quotient ready
                                        ; to be returned.
        MOV     A,B                     ;Save remainder shifted 4 bits left.
        SWAP    A                       ;
        MOV     R2,A                    ;
                                        ;
        INC     R0                      ;

        DJNZ    DPL,?fast_long_divide_loop

        CLR     A                       ;Sort out the registers so that
        MOV     R0,A                    ; the remainder ends up in
        MOV     R1,A                    ; R3 with the other registers
        XCH     A,R2                    ; (R0,R1,R2) being zero.
        MOV     R3,A                    ;
                                        ;
        POP     DPL                     ;
                                        ;
        RET                             ;

    #pragma ENDASM

    return( dividend );

}
&lt;/pre&gt;&lt;/p&gt;&lt;div style="clear:both;"&gt;&lt;/div&gt;</description></item></channel></rss>