Fast 32-bit and 16-bit Division

Here are two assembly language functions which perform division of 32-bit and 16-bit numbers by values in the range 1…16 inclusive.

Although the range of the divisor is very limited, these functions are significantly faster than conventional division functions. The 32-bit division requires about 25% of the time required by C51's 32-bit division and the 16-bit division requires about 60% of the time required by C51's 16-bit division. (Even though the Keil division functions seem to be well optimised.)

The 32-bit division function is sufficiently fast that it may called more than once to divide a 32-bit value by a large value that has convenient factors. For example, dividing by 60 by first dividing by ten and then by six. This will still be significantly faster than conventional division.

The functions also return a remainder and may find useful application in radix conversion by repeated division by the radix.

These functions use the interesting trick of deriving a pointer into the current register bank. This allows the functions to loop through the dividend making the code much more compact although at the slight cost of speed.

Parents
  • //
    //  fast_long_divide()
    //
    //  This function will divide an unsigned 32-value in registers
    //  R4...R7 by a value in the range 1...16 in register R3. The
    //  function uses the currently selected register bank.
    //
    //  For Keil parameter passing compatibility, the divisor is loaded
    //  into R3 from memory.
    //
    //  This function requires about 25% of the processing time compared
    //  with the already well optimised Keil compiler unsigned long
    //  divide function and should be much faster than any general
    //  purpose 32-bit by 32-bit divide.
    //
    //  DPL is used, but its value is preserved on the stack. This is
    //  not essential, but is frequently helpful for compiler
    //  optimisation.
    //
    //  This function also returns the remainder in R0...R3
    //  (R0...R2) will always be zero.
    //
    
    #pragma ASM
    
        $REGUSE _fast_long_divide( R0, R1, R2, R3, R4, R5, R6, R7, A, B )
    
    #pragma ENDASM
    
    unsigned long int fast_long_divide( unsigned long int dividend,
                                        unsigned char divisor ) small
    {
        divisor = divisor;
    
        #pragma ASM
    
            MOV     R3,?_fast_long_divide?BYTE+4
    
    ?fast_long_divide:                      ;
    
        // dividend in:           R4,R5,R6,R7
        // divisor in:            R3
        // uses:                  R0, R1, R2, A, B and DPL
        // quotient returned in:  R4,R5,R6,R7
        // remainder returned in: R0,R1,R2,R3
    
            PUSH    DPL                     ;
                                            ;
            MOV     A,PSW                   ;Get the base address of
            ANL     A,#0x18                 ; the current register bank.
            ORL     A,#0x04                 ;Point to register R4.
            MOV     R0,A                    ;R0 is a pointer to R4.
                                            ;
            MOV     DPL,#04                 ;load loop counter.
                                            ;
            MOV     R2,#00                  ;Clear the running remainder.
                                            ;
    ?fast_long_divide_loop:                 ;
                                            ;
            MOV     A,@R0                   ;Get the MS nibble of the
            SWAP    A                       ; dividend into the accumulator.
            ANL     A,#0x0F                 ;
            ORL     A,R2                    ;
                                            ;
            MOV     B,R3                    ;Divide MS nibble by the divisor.
            DIV     AB                      ;
                                            ;
            SWAP    A                       ;Save partial result (which must be
            MOV     R1,A                    ; in the range 0...15) shifted 4 bits.
                                            ;
            MOV     A,B                     ;Save remainder (which must be in
            SWAP    A                       ; the range 0...15) shifted 4 bits left.
            MOV     R2,A                    ;
                                            ;
            MOV     A,@R0                   ;Get next nibble of the dividend into
            ANL     A,#0x0F                 ; the accumulator.
            ORL     A,R2                    ;Or in the remainder from previous
                                            ; partial division.
            MOV     B,R3                    ;Divide by the divisor.
            DIV     AB                      ;
                                            ;
            ORL     A,R1                    ;Or in the previously saved partial result.
            MOV     @R0,A                   ;Save the MSB of the quotient ready
                                            ; to be returned.
            MOV     A,B                     ;Save remainder shifted 4 bits left.
            SWAP    A                       ;
            MOV     R2,A                    ;
                                            ;
            INC     R0                      ;
    
            DJNZ    DPL,?fast_long_divide_loop
    
            CLR     A                       ;Sort out the registers so that
            MOV     R0,A                    ; the remainder ends up in
            MOV     R1,A                    ; R3 with the other registers
            XCH     A,R2                    ; (R0,R1,R2) being zero.
            MOV     R3,A                    ;
                                            ;
            POP     DPL                     ;
                                            ;
            RET                             ;
    
        #pragma ENDASM
    
        return( dividend );
    
    }
    

Reply
  • //
    //  fast_long_divide()
    //
    //  This function will divide an unsigned 32-value in registers
    //  R4...R7 by a value in the range 1...16 in register R3. The
    //  function uses the currently selected register bank.
    //
    //  For Keil parameter passing compatibility, the divisor is loaded
    //  into R3 from memory.
    //
    //  This function requires about 25% of the processing time compared
    //  with the already well optimised Keil compiler unsigned long
    //  divide function and should be much faster than any general
    //  purpose 32-bit by 32-bit divide.
    //
    //  DPL is used, but its value is preserved on the stack. This is
    //  not essential, but is frequently helpful for compiler
    //  optimisation.
    //
    //  This function also returns the remainder in R0...R3
    //  (R0...R2) will always be zero.
    //
    
    #pragma ASM
    
        $REGUSE _fast_long_divide( R0, R1, R2, R3, R4, R5, R6, R7, A, B )
    
    #pragma ENDASM
    
    unsigned long int fast_long_divide( unsigned long int dividend,
                                        unsigned char divisor ) small
    {
        divisor = divisor;
    
        #pragma ASM
    
            MOV     R3,?_fast_long_divide?BYTE+4
    
    ?fast_long_divide:                      ;
    
        // dividend in:           R4,R5,R6,R7
        // divisor in:            R3
        // uses:                  R0, R1, R2, A, B and DPL
        // quotient returned in:  R4,R5,R6,R7
        // remainder returned in: R0,R1,R2,R3
    
            PUSH    DPL                     ;
                                            ;
            MOV     A,PSW                   ;Get the base address of
            ANL     A,#0x18                 ; the current register bank.
            ORL     A,#0x04                 ;Point to register R4.
            MOV     R0,A                    ;R0 is a pointer to R4.
                                            ;
            MOV     DPL,#04                 ;load loop counter.
                                            ;
            MOV     R2,#00                  ;Clear the running remainder.
                                            ;
    ?fast_long_divide_loop:                 ;
                                            ;
            MOV     A,@R0                   ;Get the MS nibble of the
            SWAP    A                       ; dividend into the accumulator.
            ANL     A,#0x0F                 ;
            ORL     A,R2                    ;
                                            ;
            MOV     B,R3                    ;Divide MS nibble by the divisor.
            DIV     AB                      ;
                                            ;
            SWAP    A                       ;Save partial result (which must be
            MOV     R1,A                    ; in the range 0...15) shifted 4 bits.
                                            ;
            MOV     A,B                     ;Save remainder (which must be in
            SWAP    A                       ; the range 0...15) shifted 4 bits left.
            MOV     R2,A                    ;
                                            ;
            MOV     A,@R0                   ;Get next nibble of the dividend into
            ANL     A,#0x0F                 ; the accumulator.
            ORL     A,R2                    ;Or in the remainder from previous
                                            ; partial division.
            MOV     B,R3                    ;Divide by the divisor.
            DIV     AB                      ;
                                            ;
            ORL     A,R1                    ;Or in the previously saved partial result.
            MOV     @R0,A                   ;Save the MSB of the quotient ready
                                            ; to be returned.
            MOV     A,B                     ;Save remainder shifted 4 bits left.
            SWAP    A                       ;
            MOV     R2,A                    ;
                                            ;
            INC     R0                      ;
    
            DJNZ    DPL,?fast_long_divide_loop
    
            CLR     A                       ;Sort out the registers so that
            MOV     R0,A                    ; the remainder ends up in
            MOV     R1,A                    ; R3 with the other registers
            XCH     A,R2                    ; (R0,R1,R2) being zero.
            MOV     R3,A                    ;
                                            ;
            POP     DPL                     ;
                                            ;
            RET                             ;
    
        #pragma ENDASM
    
        return( dividend );
    
    }
    

Children
More questions in this forum