c c radar.fx - radar benchmark program (2D distribution) c c David O'Hallaron, Carnegie Mellon University, Nov 1993 c c Adapted from G. Shaw et al, "Multiprocessors for Radar Signal Processing", c Lincoln Laboratories, MIT, Lexington, MA, Tech Report 961, 17 Nov 1992. c #define ITERS 50 /* number of data sets */ #define NCHAN 4 /* number of antenna channels */ #define NRANGE 10 /* number of range elements */ #define NDOP 512 /* number of doppler elements */ #define MDOP 9 /* log2 of doppler elements */ #define NTARGETS 4 /* number of targets */ #define K0 1 /* Doppler window is K0..K1 */ #define K1 40 #define CHANPROCS NCHAN /* processors for channel dimension */ #define RANGEPROCS NRANGE /* processors for range dimension */ program radar c major distributed arrays real inpr(2,NDOP,NRANGE,NCHAN) ! input sensor array (before c.t.) integer det(NDOP,NRANGE,NCHAN) ! output detect array c precomputed FFT data structures real w(NDOP) ! hamming window integer brt(NDOP) ! bit reverse table real twiddles(2,NDOP/2) ! coefficients c target ranges and frequencies integer range0(NTARGETS) integer doppler0(NTARGETS) c misc items integer det2(K1-K0+1,NRANGE) integer k, iter real sigma0, thresh, const2, const1 real pi, a0 real cos, real c timing variables integer fclocks, tclocks, rclocks, dclocks integer fusecs, tusecs, rusecs, dusecs, usecs integer fpercent, tpercent, rpercent, dpercent common /timing/ fclocks, tclocks, rclocks, dclocks c data layout statements template tr(NCHAN) template td(NRANGE,NCHAN) align inpr(l,k,j,i) with tr(i) align det(k,j,i) with td(j,i) distribute tr(BLOCK(CHANPROCS)) distribute td(BLOCK(RANGEPROCS),BLOCK(CHANPROCS)) c initialize constants pi = 3.141592653589793 sigma0 = 1.13 thresh = 2.421 const2 = thresh/(NRANGE*(K1-K0+1)-1+thresh) const1 = 2*pi/NDOP c target frequencies doppler0(1) = 6 doppler0(2) = 16 doppler0(3) = 26 doppler0(4) = 36 c target ranges range0(1) = 3 range0(2) = 5 range0(3) = 7 range0(4) = 9 c onetime setup for FFT call gen_bit_reverse_table(brt,NDOP) call gen_W_table(twiddles,NDOP,MDOP,NDOP/2) c precompute the hamming window a0 = 0.5*(NDOP-1) do k = 1,NDOP w(k) = 0.54 + 0.46*cos(const1*(k-1-a0)) enddo c create the input data set (one channel per node) call dgen(inpr, range0, doppler0, sigma0) c c The radar benchmark c tclocks = 0 ! transpose time fclocks = 0 ! fft/mag time rclocks = 0 ! reduce time dclocks = 0 ! detect time do iter = 1,ITERS call sample(inpr) call compute(inpr, det, brt, twiddles, w, const2) enddo c c print results for the first channel c det2 = det(K0:K1,:,1) tusecs = (tclocks/20)/ITERS fusecs = (fclocks/20)/ITERS rusecs = (rclocks/20)/ITERS dusecs = (dclocks/20)/ITERS usecs = tusecs+fusecs+rusecs+dusecs tpercent = (real(tusecs)/real(usecs))*100.0 fpercent = (real(fusecs)/real(usecs))*100.0 rpercent = (real(rusecs)/real(usecs))*100.0 dpercent = (real(dusecs)/real(usecs))*100.0 print *, "2D Distribution" print *, NCHAN, " channels", NRANGE, " range cells" print *, "tpose :", tusecs, tpercent print *, "fft/mag :", fusecs, fpercent print *, "reduce :", rusecs, rpercent print *, "detect :", dusecs, dpercent print *, "total(us) :", usecs print *, "" print *, det2(5,2), det2(6,2), det2(7,2), $ det2(8,2), det2(9,2) print *, det2(5,3), det2(6,3), det2(7,3), $ det2(8,3), det2(9,3) print *, det2(15,5), det2(16,5), det2(17,5), $ det2(18,5), det2(19,5) print *, det2(25,7), det2(26,7), det2(27,7), $ det2(28,7), det2(29,7) print *, det2(35,9), det2(36,9), det2(37,9), $ det2(38,9), det2(39,9) print *, det2(35,10), det2(36,10), det2(37,10), $ det2(38,10), det2(39,10) end c c sample - sample the input sensor data c subroutine sample(inpr) real inpr(2,NDOP,NRANGE,NCHAN) template tr(NCHAN) align inpr(l,k,j,i) with tr(i) distribute tr(BLOCK(CHANPROCS)) nocheck inpr return end c c compute - process the input samples c subroutine compute(inpr, det, brt, twiddles, w, const2) real inpr(2,NDOP,NRANGE,NCHAN) ! sensor inputs (before corner turn) integer det(NDOP,NRANGE,NCHAN) ! detect array integer brt(NDOP) ! bit reverse table real twiddles(2,NDOP/2) ! coefficients real w(NDOP) ! hamming window real const2 ! precomputed threshhold constant c intermediate distributed arrays real inpd(2,NDOP,NRANGE,NCHAN) ! sensor inputs (after corner turn) real mag(NDOP,NRANGE,NCHAN) ! magnitude array c data layout statements template tr(NCHAN) template td(NRANGE,NCHAN) align inpr(l,k,j,i) with tr(i) align inpd(l,k,j,i) with td(j,i) align mag(k,j,i) with td(j,i) align det(k,j,i) with td(j,i) distribute tr(BLOCK(CHANPROCS)) distribute td(BLOCK(RANGEPROCS),BLOCK(CHANPROCS)) c intermediate replicated arrays real x(2,NDOP) ! windowed FFT input real y(2,NDOP) ! scaled FFT output real localthresh(NCHAN) ! threshholds c misc integer i, j, k integer timer_stop real left, right, sqrt real tmpmag(K1-K0+1), magsum(NCHAN) integer fclocks, tclocks, rclocks, dclocks common /timing/ fclocks, tclocks, rclocks, dclocks c corner turn (transpose) the input data call timer_start() idxperm(inpd,1,4,3,2) = idxperm(inpr,1,4,3,2) tclocks = tclocks + timer_stop() c windowing and Doppler processing call timer_start() pdo (j = 1:NRANGE, i=1:NCHAN) pin inpd(:,:,j,i) pout mag(:,j,i) x(1,:) = inpd(1,:,j,i)*w x(2,:) = inpd(2,:,j,i)*w call fft(x,brt,twiddles,NDOP,MDOP,NDOP/2) y = x/NDOP tmpmag = y(1,K0:K1)*y(1,K0:K1) + $ y(2,K0:K1)*y(2,K0:K1) mag(K0:K1,j,i) = sqrt(tmpmag) endpdo fclocks = fclocks + timer_stop() c Doppler subselection and reduction call timer_start() pdo (j=1:NRANGE, i=1:NCHAN) pin mag(:,j,i) pmvars magsum pcommutative pinit magsum = 0.0 pbody do k = K0,K1 magsum(i) = magsum(i) + mag(k,j,i) enddo pmerge magsum = left(magsum) + right(magsum) endpdo rclocks = rclocks + timer_stop() c constant false alarm rate (CFAR) detection call timer_start() pdo (j=1:NRANGE, i=1:NCHAN) pin mag(:,j,i) pout det(:,j,i) localthresh(i) = const2*magsum(i) do k = K0,K1 if (mag(k,j,i) .gt. localthresh(i)) then det(k,j,i) = 1 else det(k,j,i) = 0 endif enddo endpdo dclocks = dclocks + timer_stop() return end c c dgen - generate the input data set c subroutine dgen(inpr, range0, doppler0, sigma0) real inpr(2,NDOP,NRANGE,NCHAN) integer range0(NTARGETS), doppler0(NTARGETS) real sigma0 integer i, j, k, m, range, doppler real arg1, arg2, pi, gauss1, cos, sin template tr(NCHAN) align inpr(l,k,j,i) with tr(i) distribute tr(BLOCK(CHANPROCS)) pi = 3.141592653589793 arg1 = 2.0*pi/NDOP c add noise to the jth range vector pdo i = 1,NCHAN pout inpr(:,:,:,i) do j = 1,NRANGE do k = 1,NDOP inpr(1,k,j,i) = gauss1(0.0,sigma0) inpr(2,k,j,i) = gauss1(0.0,sigma0) enddo enddo endpdo c add a signal to selected range vectors do m = 1, NTARGETS range = range0(m) doppler = doppler0(m) pdo i = 1, NCHAN pin inpr(:,:,:,i) pout inpr(:,:,:,i) do k = 1,NDOP arg2 = arg1*(k-1)*doppler inpr(1,k,range,i) = inpr(1,k,range,i) + cos(arg2) inpr(2,k,range,i) = inpr(2,k,range,i) + sin(arg2) enddo endpdo enddo end c c gauss1 - crude approximation of normal distribution formed c from sum of 12 uniforms. c real function gauss1(mean,sigma) real mean, sigma real x,random1,real integer i real x x = 0.0 do i=1,12 x = x + real(random1()) enddo gauss1 = mean + sigma*(x - 6.0) end c c random1 - uniformly distributed [0,1) real c real function random1() integer a, m, m1, b, a0, a1, b0, b1 integer mod real real save a data a/11111111/ m = 100000000 m1 = 10000 b = 31415821 a1 = a/m1 a0 = mod(a,m1) b1 = b/m1 b0 = mod(b,m1) a0 = mod((a0*b1 + a1*b0),m1)*m1 + a0*b0 a = mod((a0 + 1),m) random1 = real(a)/m end c c gen_bit_reverse_table c Initialize bit reverse table (it actually differed by 1). c c Postcondition: br(i) = bit-reverse(i-1) + 1 c subroutine gen_bit_reverse_table(brt,n) integer n integer brt(n) integer i, j, k, nv2 nv2 = n/2 j = 1 brt(1) = j do i = 2, n k = nv2 6 continue if (k .lt. j) then j = j - k k = k/2 goto 6 endif j = j + k brt(i) = j enddo return end c c gen_W_table: generate powers of W. c c Postcondition: W(i) = W**(i-1) c subroutine gen_W_table(W,n,m,ndv2) integer n,m,ndv2 real W(2,ndv2) integer i real pi real wr,wi real ptr, pti real cos, sin pi = 3.141592653589793 wr = cos(pi/ndv2) wi = -sin(pi/ndv2) W(1,1) = 1.0 W(2,1) = 0.0 ptr = 1.0 pti = 0.0 do i = 2,ndv2 W(1,i) = ptr*wr - pti*wi W(2,i) = ptr*wi + pti*wr ptr = W(1,i) pti = W(2,i) enddo return end subroutine fft(a, brt, W, n, m, ndv2) integer n,m,ndv2 real a(2,n) integer brt(n) real W(2,ndv2) integer i,j integer powerOfW integer sPowerOfW integer ijDiff integer stage integer stride integer first real pwr,pwi,tr,ti real ir, ii, jr, ji nocheck c c bit reverse step c do i = 1,n j = brt(i) if (i .lt. j) then tr = a(1,j) ti = a(2,j) a(1,j) = a(1,i) a(2,j) = a(2,i) a(1,i) = tr a(2,i) = ti endif enddo c c butterfly computations c ijDiff = 1 stride = 2 sPowerOfW = ndv2 do stage = 1,m c Invariant: stride = 2 ** stage c Invariant: ijDiff = 2 ** (stage - 1) first = 1 do powerOfW = 1, ndv2, sPowerOfW pwr = W(1,powerOfW) pwi = W(2,powerOfW) c Invariant: pwr + sqrt(-1)*pwi = W**(powerOfW - 1) do i = first, n, stride j = i + ijDiff jr = a(1,j) ji = a(2,j) ir = a(1,i) ii = a(2,i) tr = jr*pwr - ji*pwi ti = jr*pwi + ji*pwr a(1,j) = ir - tr a(2,j) = ii - ti a(1,i) = ir + tr a(2,i) = ii + ti enddo first = first + 1 enddo ijDiff = stride stride = stride * 2 sPowerOfW = sPowerOfW / 2 enddo return end