/* Vector Dot Product * This program computes a simple vector dot product using hard * wired input buffers of 128 samples each. These values are in * 1.15 signed . */ # mach: bfin .include "testutils.inc" start // load buffer addresses into pointer regs loadsym I0, data0; loadsym I1, data1; // loop control // number of loop iterations is 2^N with r4|=1<<N // to process 128 samples need 64 iterations P4 = 63; LSETUP ( loop1 , loop1 ) LC0 = P4; A1 = A0 = 0; // For now, serialize two 32b loads. // These should be done in parallel with the dual mac. R0 = [ I0 ++ ]; R1 = [ I1 ++ ]; loop1: A1 += R0.H * R1.H, A0 += R0.L * R1.L || R0 = [ I0 ++ ] || R1 = [ I1 ++ ]; A1 += R0.H * R1.H, A0 += R0.L * R1.L; // extract two partial results from accumulators // and do final addition R0 = ( A0 += A1 ); DBGA ( R0.L , 0x5600 ); // 0x00545600 = 0.002574 fract DBGA ( R0.H , 0x0054 ); pass .data data0: .dw 0x0 .dw 0x2 .dw 0x4 .dw 0x6 .dw 0x8 .dw 0xA .dw 0xC .dw 0xE .dw 0x10 .dw 0x12 .dw 0x14 .dw 0x16 .dw 0x18 .dw 0x1A .dw 0x1C .dw 0x1E .dw 0x20 .dw 0x22 .dw 0x24 .dw 0x26 .dw 0x28 .dw 0x2A .dw 0x2C .dw 0x2E .dw 0x30 .dw 0x32 .dw 0x34 .dw 0x36 .dw 0x38 .dw 0x3A .dw 0x3C .dw 0x3E .dw 0x40 .dw 0x42 .dw 0x44 .dw 0x46 .dw 0x48 .dw 0x4A .dw 0x4C .dw 0x4E .dw 0x50 .dw 0x52 .dw 0x54 .dw 0x56 .dw 0x58 .dw 0x5A .dw 0x5C .dw 0x5E .dw 0x60 .dw 0x62 .dw 0x64 .dw 0x66 .dw 0x68 .dw 0x6A .dw 0x6C .dw 0x6E .dw 0x70 .dw 0x72 .dw 0x74 .dw 0x76 .dw 0x78 .dw 0x7A .dw 0x7C .dw 0x7E .dw 0x80 .dw 0x82 .dw 0x84 .dw 0x86 .dw 0x88 .dw 0x8A .dw 0x8C .dw 0x8E .dw 0x90 .dw 0x92 .dw 0x94 .dw 0x96 .dw 0x98 .dw 0x9A .dw 0x9C .dw 0x9E .dw 0xA0 .dw 0xA2 .dw 0xA4 .dw 0xA6 .dw 0xA8 .dw 0xAA .dw 0xAC .dw 0xAE .dw 0xB0 .dw 0xB2 .dw 0xB4 .dw 0xB6 .dw 0xB8 .dw 0xBA .dw 0xBC .dw 0xBE .dw 0xC0 .dw 0xC2 .dw 0xC4 .dw 0xC6 .dw 0xC8 .dw 0xCA .dw 0xCC .dw 0xCE .dw 0xD0 .dw 0xD2 .dw 0xD4 .dw 0xD6 .dw 0xD8 .dw 0xDA .dw 0xDC .dw 0xDE .dw 0xE0 .dw 0xE2 .dw 0xE4 .dw 0xE6 .dw 0xE8 .dw 0xEA .dw 0xEC .dw 0xEE .dw 0xF0 .dw 0xF2 .dw 0xF4 .dw 0xF6 .dw 0xF8 .dw 0xFA .dw 0xFC .dw 0xFE data1: .dw 0x0 .dw 0x2 .dw 0x4 .dw 0x6 .dw 0x8 .dw 0xA .dw 0xC .dw 0xE .dw 0x10 .dw 0x12 .dw 0x14 .dw 0x16 .dw 0x18 .dw 0x1A .dw 0x1C .dw 0x1E .dw 0x20 .dw 0x22 .dw 0x24 .dw 0x26 .dw 0x28 .dw 0x2A .dw 0x2C .dw 0x2E .dw 0x30 .dw 0x32 .dw 0x34 .dw 0x36 .dw 0x38 .dw 0x3A .dw 0x3C .dw 0x3E .dw 0x40 .dw 0x42 .dw 0x44 .dw 0x46 .dw 0x48 .dw 0x4A .dw 0x4C .dw 0x4E .dw 0x50 .dw 0x52 .dw 0x54 .dw 0x56 .dw 0x58 .dw 0x5A .dw 0x5C .dw 0x5E .dw 0x60 .dw 0x62 .dw 0x64 .dw 0x66 .dw 0x68 .dw 0x6A .dw 0x6C .dw 0x6E .dw 0x70 .dw 0x72 .dw 0x74 .dw 0x76 .dw 0x78 .dw 0x7A .dw 0x7C .dw 0x7E .dw 0x80 .dw 0x82 .dw 0x84 .dw 0x86 .dw 0x88 .dw 0x8A .dw 0x8C .dw 0x8E .dw 0x90 .dw 0x92 .dw 0x94 .dw 0x96 .dw 0x98 .dw 0x9A .dw 0x9C .dw 0x9E .dw 0xA0 .dw 0xA2 .dw 0xA4 .dw 0xA6 .dw 0xA8 .dw 0xAA .dw 0xAC .dw 0xAE .dw 0xB0 .dw 0xB2 .dw 0xB4 .dw 0xB6 .dw 0xB8 .dw 0xBA .dw 0xBC .dw 0xBE .dw 0xC0 .dw 0xC2 .dw 0xC4 .dw 0xC6 .dw 0xC8 .dw 0xCA .dw 0xCC .dw 0xCE .dw 0xD0 .dw 0xD2 .dw 0xD4 .dw 0xD6 .dw 0xD8 .dw 0xDA .dw 0xDC .dw 0xDE .dw 0xE0 .dw 0xE2 .dw 0xE4 .dw 0xE6 .dw 0xE8 .dw 0xEA .dw 0xEC .dw 0xEE .dw 0xF0 .dw 0xF2 .dw 0xF4 .dw 0xF6 .dw 0xF8 .dw 0xFA .dw 0xFC .dw 0xFE