;; Average square of four pixels to single pixel.;; Produces NxM pixel image from 2Nx2M pixel image.;; Generates 16 output pixels per loop.;; May over-read by upto 63 bytes.;; May over-write by upto 15 bytes.;; r0 = Input line start address;; r1 = Input line width in bytes;; r2 = Input line total size in bytes;; r3 = Output line start addressquad FUNC;; Compute start of second line and end address ADD r1,r1,r0 ADD r2,r2,r11;; Load 32 pixels from each of two rows VLD1.8 {Q0,Q1},[r0]! VLD1.8 {Q2,Q3},[r1]!;; Sum neighbouring 8-bits in each row to 16-bits VPADDL.U8 Q0,Q0 VPADDL.U8 Q1,Q1 VPADDL.U8 Q2,Q2 VPADDL.U8 Q3,Q3;; Sum 16-bit values vertically VADD.U16 Q0,Q0,Q2 VADD.U16 Q1,Q1,Q3;; Divide each sum of four pixels by 4 and cast to char VSHRN.U16 D0,Q0,#2 VSHRN.U16 D1,Q1,#2;; Store 16 pixels of resized image VST1.8 {Q0},[r3]!;; Loop if not past end of image CMP r1,r2 BLE %b1;; Return from function BX lr ENDFUNC