% This is the NESL code for the Parallel Tree Multipole Algorithm (PMTA). The algorithm was proposed by the Scientific Computing Group at the Dept. of Electrical Engineering, Duke University. For a full decription, see Technical Report 94-002, Duke Univaersity, 1994. % % ------ PARALLEL MULTIPOLE TREE ALGORITHM ------- % % #terms in the multipole expansion can be adjusted. % P = 3; % The parameter that determines accuracy % Alpha = 0.8; % adjust the number of particles in each leaf node % m = 80; %-----------------------------------------------------------------------% % The function that generates randomly distributed points in 3-D. Modify this function for running the algorithm on a different distribution. % % uncharged distrib with mass = 1 % function generate_point(n) = let pos_vec = {(rand(i)-5.0,rand(i)-5.0,rand(i)-5.0): i in dist(10.0,n)}; vel_vec = {(rand(i)-5.0,rand(i)-5.0,rand(i)-5.0): i in dist(10.0,n)}; mass_vec = dist(1.0,n); in {[x,y,z,vx,vy,vz,mass_vec]: (x,y,z) in pos_vec ; (vx,vy,vz) in vel_vec; mass_vec} $ function rpts(n) = generate_point(n) $ %-----------------------------------------------------------------------% % data-types and related functions for PMTA % datatype vect (float,float,float); datatype box (vect,vect); datatype cmass (float,vect); Empty_pvect = [](float,float,float,float) $ function real(a,b) = a $ function cpos(cmass(m,pos)) = pos $ function cm_mass(cmass(m,pos)) = m $ function pos(coords,particle_index) = coords $ function particle_pos(x,y,z,rest) = (x,y,z) $ function particle_mass(x,y,z,m) = m $ function get_leaf_center(d,cm,rest) = let vect(v) = cpos(cm) in v $ function get_leaf_pvect(d,c,pvect,rest) = pvect $ function mass((pos,m)) = m $ function indx(coords,particle_index) = particle_index $ function size(box(coords,vect(sz))) = sz $ function get_center(box(vect(coords),sz)) = coords $ function kids(d,cm,pvect,mpole,cell_box,kd) = kd $ function make_box((x0,y0,z0),(dx,dy,dz)) = box(vect(x0,y0,z0),vect(dx,dy,dz)) $ function negv(vect(x,y,z)) = vect(-x, -y, -z) $ function addv(vect(x1,y1,z1),vect(x2,y2,z2)) = vect((x1+x2),(y1+y2),(z1+z2)) $ function subv(vect(x1,y1,z1),vect(x2,y2,z2)) = vect((x1-x2),(y1-y2),(z1-z2)) $ function mulvs(vect(x1,y1,z1),s) = vect((x1*s),(y1*s),(z1*s)) $ function dotvp(vect(x1,y1,z1),vect(x2,y2,z2)) = x1*x2 + y1*y2 + z1*z2 $ function mulmv((r1,r2,r3),v) = vect(dotvp(r1,v),dotvp(r2,v),dotvp(r3,v)) $ function get_zero_cmass(ignore) = cmass(0.0,vect(0.0,0.0,0.0)) $ function max3(a,b,c) = max(a,max(b,c)) $ function unzip3(v) = let (xs,yzs) = unzip(v); (ys,zs) = unzip(yzs); in (xs,ys,zs) $ function sumv(vec) = let (vecx,vecy,vecz) = unzip3(vec); in (sum(vecx),sum(vecy),sum(vecz)) $ function nested_get(a,i) = let lens = {#i:i}; vals = a->flatten(i) in partition(vals,lens) $ % added for mpole % function add_c((a,b),(c,d)) = (a+c,b+d) $ function mult_r_c(r,(a,b)) = (r*a,r*b) $ function mult_c_c((a,b),(c,d)) = (a*c-b*d,a*d+b*c) $ function get_r((r,th,ph),mass) = r $ function get_th((r,th,ph),mass) = th $ function get_ph((r,th,ph),mass) = ph $ function sum_c(v) = let (v1,v2) = unzip(v) in (sum(v1),sum(v2)) $ function sum_2_terms(term_1,term_2) = if (#term_1==0) then term_2 else if (#term_2==0) then term_1 else let lens = {#term : term in term_1 }; t1 = flatten(term_1); t2 = flatten(term_2); sum_12 = {add_c(t1,t2) : t1; t2}; in partition(sum_12,lens) $ function sum_terms(term_vec) = let filt_vec = {term : term in term_vec | #term>0 }; in %if (#filt_vec == 0) then Zero_Exp else% if (#filt_vec == 0) then []([(float,float)]) else let lens = {#term : term in filt_vec[0] }; flat_terms = {flatten(filt_vec) : filt_vec}; num = #flat_terms[0]; seq = iseq(0,1,num); vects = {{elt(flat_terms,i):flat_terms}: i in seq}; sums = {sum_c(vects) : vects}; in partition(sums,lens) $ %-----------------------------------------------------------------------% % Values that depend on P % Num_Terms = (p+1)*(p+2)/2; Index_Vector = flatten({{(n,m): m in iseq(0,1,n+1)}: n in iseq(0,1,P+1)}); Lens = let temp_vect = {{(n,m): m in iseq(0,1,n+1)}: n in iseq(0,1,P+1)}; in {#temp_vect: temp_vect}; Zero_Exp = {{(0.0,0.0): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)} $ Half_Zero_Exp = {{(0.0,0.0): m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)} $ function factorial(i) = if (i<=1) then 1.0 else float(i)*factorial(i-1) $ function odd_factorial(i) = if (i <= 1) then 1.0 else float(i)*odd_factorial(i-2) $ Fact = {factorial(i) : i in iseq(0,1,2*P+1)}; Odd_Fact = {odd_factorial(2*i-1) : i in iseq(0,1,P+1)}; NM_term = {{sqrt(Fact[n-abs(m)]/Fact[n+abs(m)]) : m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)}; %-----------------------------------------------------------------------% % Math functions used in the PMTA algorithm % NM_term = {{sqrt(Fact[n-abs(m)]/Fact[n+abs(m)]) : m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)}; function conjugate(a,b) = (a,-b) $ function get_root_term(v,n,m) = if (abs(m) > n) then 0.0 else v[n][m] $ function minus_one_raised_to(i) = if evenp(i) then 1.0 else -1.0 $ function get_pos(n,m) = n*(n+1)/2 + m $ function get_A(u,v) = if ((u < abs(v))or(u > P)) then 1.0 else minus_one_raised_to(u)/(sqrt(Fact[u-v]*Fact[u+v])) $ function get_J1(u,v) = if (u*v>=0) then 1.0 else minus_one_raised_to(min(abs(u),abs(v))) $ function get_J2(u,v,w) = let coeff = minus_one_raised_to(w); in if (u*v<=0) then coeff else coeff * minus_one_raised_to(min(abs(u),abs(v))) $ function get_term(v,n,m) = if ((abs(m) > n) or (n>P)) then (0.0,0.0) else if (m < 0) then conjugate(v[n][-m]) else v[n][m] $ % expt(0.0,0.0) is not defined!! % function expt1(a,b) = if (b==0.0) then 1.0 else if (a==0.0) then 0.0 else expt(a,b) $ function conjugate(a,b) = (a,-b) $ function minus_one_raised_to(i) = if evenp(i) then 1.0 else -1.0 $ function get_root_term(v,n,m) = if (abs(m) > n) then 0.0 else v[n][m] $ function atan2(x,y) = if (x==0.0) then if (y < 0.0) then -pi/2.0 else pi/2.0 else if (x>0.0) then atan(y/x) else if (y>=0.0) then atan(y/x) + pi else atan(y/x) - pi $ function new_coords((x,y,z),(x0,y0,z0)) = let dx = x - x0; dy = y - y0; dz = z - z0; r = sqrt(dx^2 + dy^2 + dz^2); % th = if (r==0.0) then 0.0 else acos(dz/r); % th = if (r==0.0) then pi/2.0 else acos(dz/r); ph = atan2(dx,dy); in (r,th,ph) $ function vect_new_coords(center,v) = {new_coords((x,y,z),center) : (x,y,z,m) in v} $ function atan2(x,y) = if (x==0.0) then if (y < 0.0) then -pi/2.0 else pi/2.0 else if (x>0.0) then atan(y/x) else if (y>=0.0) then atan(y/x) + pi else atan(y/x) - pi $ function new_coords2((x,y,z),(x0,y0,z0)) = let dx = x - x0; dy = y - y0; dz = z - z0; r = sqrt(dx^2 + dy^2 + dz^2); th = if (r==0.0) then 0.0 else acos(dz/r); ph = if ((dx==0.0) and (dy==0.0)) then 0.0 else atan2(dx,dy); in vect(r,th,ph) $ function sum_terms(term_vec) = let filt_vec = {term : term in term_vec | #term>0 }; in if (#filt_vec == 0) then []([(float,float)]) else let lens = {#term : term in filt_vec[0] }; flat_terms = {flatten(filt_vec) : filt_vec}; num = #flat_terms[0]; seq = iseq(0,1,num); vects = {{elt(flat_terms,i):flat_terms}: i in seq}; sums = {sum_c(vects) : vects}; in partition(sums,lens) $ function legendre(n,m,x) = if (n n) or (n > P) or (n<0)) then (0.0,0.0) else if (m<0) then conjugate(Y_term(n,-m,th,ph)) else let m_fl = float(m); in mult_r_c(get_root_term(NM_term,n,m) * legendre(n,abs(m),cos(th)), (cos(m_fl*ph),sin(m_fl*ph))) $ % finding mpole expansion of one particle % function M_term(n,m,body) = mult_r_c(mass(body)*(get_r(body)^n),Y_term(n,-m,get_th(body),get_ph(body))) $ function get_M_terms(particle) = {{ M_term(n,m,particle): m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)} $ function first_M_terms(pmass) = rep(Zero_Exp,[(pmass,0.0)],0) $ % finding m_pole exp in serial % function serial_legendre(result,ind_vect,x) = if (#ind_vect==0) then result else let ((n,m),rest) = head_rest(ind_vect); curr_result = if (n==m) then minus_one_raised_to(m)*Odd_Fact[m]*expt1((1.0-x*x),float(m)/2.0) else if (n==m+1) then x*float(2*m+1)*result[get_pos(m,m)] else (x*float(2*n-1)*result[get_pos(n-1,m)] - float(n+m-1)*result[get_pos(n-2,m)])/float(n-m); result = rep(result,curr_result,get_pos(n,m)); in serial_legendre(result,rest,x) $ function ser_legendre(x) = let result = dist(0.0,Num_Terms); ind_vect = index_vector; in serial_legendre(result,ind_vect,x) $ function M_contrib(n,m,r,mass,ph,legend) = let m_ph = float(m)*ph; real_coeff = get_root_term(NM_term,n,m)*legend*(r^n)*mass; in mult_r_c(real_coeff,(cos(m_ph),-sin(m_ph))) $ function Y_contrib(n,m,ph,legend) = let m_ph = float(m)*ph; real_coeff = get_root_term(NM_term,n,m)*legend; in mult_r_c(real_coeff,(cos(m_ph),sin(m_ph))) $ function serial_M_terms(result,particles) = if (#particles==0) then result else let (head,rest) = head_rest(particles); legend = ser_legendre(cos(get_th(head))); mss = mass(head); r = get_r(head); ph = get_ph(head); head_vect = { M_contrib(n,m,r,mss,ph,legend) : legend; (n,m) in Index_Vector}; result = { add_c(head_vect,result) : result; head_vect}; in serial_M_terms(result,rest) $ function ser_get_M_terms(particles) = if (#particles==0) then []([(float,float)]) else let result = dist((0.0,0.0),Num_Terms); flat_result = serial_M_terms(result, particles); in partition(flat_result,Lens) $ % to translate mpole % function get_new_M(j,k,old_M,(rho,alpha,beta)) = let temp_vect = {{mult_r_c(get_J1(m,k-m)* get_A(n,m)* get_A(j-n,k-m)* (rho^n)/get_A(j,k), mult_c_c(get_term(old_M,j-n,k-m),Y_term(n,-m,alpha,beta))) : m in iseq(-n,1,n+1)}: n in iseq(0,1,j+1)}; in sum_c({sum_c(v): v in temp_vect}) $ % disp is vector from parent center as origin to child center % function trans_mpole(old_M,disp) = if (#old_M==0) then []([(float,float)]) else {{ get_new_M(j,k,old_m,disp) : k in iseq(0,1,j+1)} : j in iseq(0,1,P+1)} $ % to translate mpole in serial % function get_inds(a,b) = if ((a<=P) and (abs(b)<=a)) then (a,abs(b)) else (0,0) $ function get_flags(a,b) = if ((a>P) or (abs(b)>a)) then -1 else if (b<0) then 0 else 1 $ function correct((a,b),flag) = if (flag == -1) then (0.0,0.0) else if (flag == 0) then (a,-b) else (a,b) $ function get_flags2(a,b) = if (b>a) then f else t $ function correct2(a,flag) = if (flag) then a else 0.0 $ function serial_M_trans(old_M,Y_terms,result,ind_vect) = if (#ind_vect==0) then result else let ((j,k),rest) = head_rest(ind_vect); A_jk = get_A(j,k); M_inds = {{get_inds(j-n,k-m) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; M_flags = {{get_flags(j-n,k-m) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; M_pos = {{get_pos(M_inds) : M_inds} : M_inds}; old_M_terms = nested_get(flatten(old_M),M_pos); old_M_terms = {{correct(old_M_terms,M_flags): old_M_terms; M_flags}: old_M_terms; M_flags}; coeff_vect = {{get_J1(m,k-m)*get_A(j-n,k-m)/A_jk : m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; temp_vect = {{mult_r_c(coeff_vect,mult_c_c(Y_terms, old_M_terms)) : old_M_terms; Y_terms; coeff_vect}: old_M_terms; Y_terms; coeff_vect}; curr_result = sum_c({sum_c(v): v in temp_vect}); result = rep(result, curr_result, get_pos(j,k)); in serial_M_trans(old_M,Y_terms,result,rest) $ function Y_conj(legendre,n,m,ph) = let m_ph = float(m)*ph in mult_r_c(get_root_term(NM_term,n,m) * legendre,(cos(m_ph),-sin(m_ph))) $ function ser_trans_mpole(old_M,(r,th,ph)) = if (#old_M==0) then []([(float,float)]) else let legend = partition(ser_legendre(cos(th)),Lens); inds = {{get_pos(n,abs(m)) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; full_lens = {#inds: inds}; r_terms = mult_scan(dist(r,P+1)); Y_temp = {{ mult_r_c((r_terms)*get_A(n,m),Y_conj(legend,n,m,ph)) : legend; m in iseq(0,1,n+1)}: r_terms; legend; n in iseq(0,1,P+1)}; Y_terms = flatten(Y_temp) -> flatten(inds); indices = flatten({{(n,m): m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}); Y_terms = {if (m<0) then conjugate(Y) else Y : Y in Y_terms; (n,m) in indices}; Y_terms = partition(Y_terms,full_lens); result = dist((0.0,0.0),Num_Terms); result = serial_M_trans(old_M,Y_terms,result,Index_Vector); in partition(result,Lens) $ function serial_M_to_L(sym_M_terms,Y_terms,result,ind_vect,rho,A_nm) = if (#ind_vect==0) then result else let ((j,k),rest) = head_rest(ind_vect); A_jk = get_A(j,k); Y_inds = {{j+n,m-k: m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; filt_inds = {{ get_inds(a,b) : (a,b) in Y_inds}: Y_inds}; flags = {{ get_flags(a,b) : (a,b) in Y_inds}: Y_inds}; offsets = {{get_pos(filt_inds): filt_inds}: filt_inds}; Y_seq = nested_get(flatten(Y_terms),offsets); Y_seq = {{correct(Y_seq,flags): Y_seq;flags} : Y_seq;flags}; temp_vect = {{ mult_r_c((get_J2(k,m,n)* A_nm * A_jk) / (get_A(j+n,m-k) * (rho^(j+n+1))), mult_c_c(sym_M_terms,Y_seq)) : A_nm; m in iseq(-n,1,n+1);sym_M_terms; Y_seq} : A_nm; n in iseq(0,1,P+1);sym_M_terms; Y_seq}; current_result = sum_c({sum_c(v): v in temp_vect}); result = rep(result,current_result,get_pos(j,k)); in serial_M_to_L(sym_M_terms,Y_terms,result,rest,rho,A_nm) $ function ser_l_from_m (M_terms,vect(r,th,ph)) = if (#M_terms==0) then []([(float,float)]) else let legend = ser_legendre(cos(th)); Y_terms = partition({mult_r_c(get_root_term(NM_term,n,m)*legend, (cos(float(m)*ph),sin(float(m)*ph))): (n,m) in Index_Vector; legend},Lens); sym_M_terms = {{get_term(M_terms,n,m): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; A_nm = {{get_A(n,m): m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; result = dist((0.0,0.0),Num_Terms); result = serial_M_to_L(sym_M_terms,Y_terms,result,Index_Vector,r,A_nm); in partition(result,Lens) $ % finding potential % function potential(vect(r,th,ph),mpole_exp) = if (#mpole_exp==0) then 0.0 else let temp_vect = {{mult_r_c(1.0/r^(n+1),mult_c_c(get_term(mpole_exp,n,m),Y_term(n,m,th,ph))): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; in real(sum_c({sum_c(temp_vect):temp_vect})) $ % finding gradient to get force % function r_grad((r,th,ph),(real_L,imag_L),(n,m),(real_Y,imag_Y)) = if (n==0) then 0.0 else let coeff = if (m==0) then 1.0 else 2.0; in coeff * float(n) * (real_Y*real_L - imag_Y*imag_L) *(r^(n-1)) $ % function th_ph_grad((r,th,ph),(real_M,imag_M),n,m) = if (n==0) then (0.0,0.0) else let coeff = if (m==0) then 1.0 else 2.0; m_fl = float(m); abs_m = abs(m); cos_th = cos(th); sin_th = sin(th); sin_square = 1.0-cos_th^2; common_term = get_root_term(NM_term,n,m)*(r^(n-1)); legendre_1 = legendre(n,abs_m,cos_th); legendre_2 = legendre(n,abs_m+1,cos_th); coeff1 = common_term*sin_th*((-cos_th*abs(m_fl)*legendre_1/sin_square) -legendre_2/sqrt(sin_square)); m_ph = m_fl*ph; cos_term = cos(m_ph); sin_term = sin(m_ph); th_term = coeff * -coeff1 * (cos_term*real_M - sin_term*imag_M); coeff2 = m_fl*legendre_1*common_term/sin_th; ph_term = coeff * coeff2 * (- sin_term*real_M - cos_term*imag_M); in (th_term,ph_term) $ % function th_ph_grad((r,th,ph),(real_M,imag_M),(n,m),(legendre_1,legendre_2)) = if (n==0) then (0.0,0.0) else %if (th==0.0) then (0.0,0.0) else% let coeff = if (m==0) then 1.0 else 2.0; m_fl = float(m); abs_m = abs(m); cos_th = cos(th); sin_th = sin(th); sin_square = 1.0-cos_th^2; common_term = get_root_term(NM_term,n,m)*(r^(n-1)); coeff1 = common_term*sin_th*((-cos_th*abs(m_fl)*legendre_1/sin_square) -legendre_2/sqrt(sin_square)); m_ph = m_fl*ph; cos_term = cos(m_ph); sin_term = sin(m_ph); th_term = coeff * -coeff1 * (cos_term*real_M - sin_term*imag_M); coeff2 = m_fl*legendre_1*common_term/sin_th; ph_term = coeff * coeff2 * (- sin_term*real_M - cos_term*imag_M); in (th_term,ph_term) $ function get_xyz_acc((r_comp,th_comp,ph_comp),(th,ph)) = let costheta = cos(th); sintheta = sin(th); cosphi = cos(ph); sinphi = sin(ph); x_acc = r_comp*sintheta*cosphi + th_comp*costheta*cosphi - ph_comp*sinphi; y_acc = r_comp*sintheta*sinphi + th_comp*costheta*sinphi + ph_comp*cosphi; z_acc = r_comp*costheta - th_comp*sintheta; in (x_acc,y_acc,z_acc) $ % to find grad in serial % %function serial_r_grad(result,coords,local_exp,ind_vect,curr_ind,Y_terms) = if (#ind_vect==0) then result else % function serial_r_grad(result,coords,l_i_y,curr_ind) = if (#l_i_y==0) then result else let % (h_l,r_l) = head_rest(local_exp); (h_i,r_i) = head_rest(ind_vect); (h_y,r_y) = head_rest(Y_terms); curr_res = r_grad(coords,h_l,h_i,h_y); % (h_liy,r_liy) = head_rest(l_i_y); curr_res = r_grad(coords,h_liy); result = rep(result,curr_res,curr_ind); in serial_r_grad(result,coords,r_liy,curr_ind+1) $ function serial_th_ph_grad(result,coords,mpole_exp,ind_vect,curr_ind,legend) = if (#ind_vect==0) then result else let (h_m,r_m) = head_rest(mpole_exp); (h_i,r_i) = head_rest(ind_vect); (h_l,r_l) = head_rest(legend); curr_res = th_ph_grad(coords,h_m,h_i,h_l); result = rep(result,curr_res,curr_ind) in serial_th_ph_grad(result,coords,r_m,r_i,curr_ind+1,r_l) $ function serial_gradient(mpole_exp,coords) = if (#mpole_exp == 0) then (0.0,0.0,0.0) else let (r,th,ph) = coords; grad_r = dist(0.0,Num_Terms); legend = ser_legendre(cos(th)); Y_terms = {Y_contrib(n,m,ph,legend): legend; (n,m) in Index_Vector}; temp_arg = zip(flatten(mpole_exp),zip(Index_vector,Y_terms)); % temp_vect1=partition(serial_r_grad(grad_r,coords,flatten(mpole_exp),Index_Vector,0,Y_terms),Lens); % temp_vect1=partition(serial_r_grad(grad_r,coords,temp_arg,0),Lens); grad_th_ph = dist((0.0,0.0),Num_Terms); legend_ind1 = {get_pos(Index_Vector) : Index_Vector}; legend_ind2 = {(n,m+1) : (n,m) in Index_Vector}; flags = {get_flags2(legend_ind2) : legend_ind2}; legend_ind2 = {get_inds(legend_ind2) : legend_ind2}; legend_1 = legend -> legend_ind1; legend_2 = legend -> {get_pos(legend_ind2) : legend_ind2}; legend_2_flags = zip(legend_2,flags); legend_2 = {correct2(ind_flag) : ind_flag in legend_2_flags}; temp_vect2=partition(serial_th_ph_grad(grad_th_ph,coords,flatten(mpole_exp),Index_Vector,0,zip(legend_1,legend_2)),Lens); temp_vect3 = {zip(temp_vect1,temp_vect2):temp_vect1;temp_vect2}; temp_vect4 = sumv({sumv(temp_vect3) : temp_vect3}); in get_xyz_acc(temp_vect4,th,ph) $ %-----------------------------------------------------------------------% % The PMTA algorithm % % to measure #interactions in 3-d % function cm_pos(cmass(mass,vect(pos))) = pos $ function unzip3(v) = let (xs,yzs) = unzip(v); (ys,zs) = unzip(yzs); in (xs,ys,zs) $ function sumv(vec) = let (vecx,vecy,vecz) = unzip3(vec); in (sum(vecx),sum(vecy),sum(vecz)) $ %function sub3((x1,y1,z1),(x2,y2,z2)) = ((x1-x2),(y1-y2),(z1-z2)) $% function make_quads(v,x_axis,y_axis) = let flags = {x >= x_axis : (x,y,rest) in v }; lr = split(v,flags); flag2314 = {{ y > y_axis : (x,y,rest) in lr } : lr}; quad3241 = {split(lr,fl): lr ; fl in flag2314}; in flatten(quad3241) $ function max3(a,b,c) = max(a,max(b,c)) $ function make_centers(x0,y0,z0,qdx,qdy,qdz) = let x_minus = x0-qdx; y_minus = y0-qdy; z_minus = z0-qdz; x_plus = x0+qdx; y_plus = y0+qdy; z_plus = z0+qdz; centers = dist((0.0,0.0,0.0),8); centers = rep(centers,(x_minus,y_minus,z_minus),0); centers = rep(centers,(x_minus,y_plus,z_minus),1); centers = rep(centers,(x_plus,y_minus,z_minus),2); centers = rep(centers,(x_plus,y_plus,z_minus),3); centers = rep(centers,(x_minus,y_minus,z_plus),4); centers = rep(centers,(x_minus,y_plus,z_plus),5); centers = rep(centers,(x_plus,y_minus,z_plus),6); centers = rep(centers,(x_plus,y_plus,z_plus),7); in centers $ function get_geo_center(v) = let mass = sum({ abs(m) : (x,y,z,m) in v }); (mxs,mys,mzs) = unzip3({ (abs(m)*x,abs(m)*y,abs(m)*z) : (x,y,z,m) in v }); pos = (sum(mxs)/mass,sum(mys)/mass,sum(mzs)/mass); in pos,mass $ function make_tree(v,cell_box) = if (#v <= m) then let center = get_center(cell_box); (gpos,gmass) = get_geo_center(v); coords_m = {new_coords((x,y,z),gpos),m : (x,y,z,m) in v}; mpole = ser_get_M_terms(coords_m); in [(1,cmass(gmass,vect(gpos)),v,mpole,cell_box,[-1])] else % > m particles % let center = get_center(cell_box); (x0,y0,z0) = get_center(cell_box); (dx,dy,dz) = size(cell_box); hdx = dx/2.0; hdy = dy/2.0; hdz = dz/2.0; flags_up_down = { z >= z0 : (x,y,z,rest) in v }; down_up = split(v,flags_up_down); kids = flatten({ make_quads(down_up,x0,y0) : down_up }); kid_centers = make_centers(x0,y0,z0,hdx/2.0,hdy/2.0,hdz/2.0); quadlist = {kids,make_box(kid_centers,(hdx,hdy,hdz)) : kids ; kid_centers | #kids > 0}; desc = {make_tree(qd) : qd in quadlist }; children = { d[0] : d in desc }; child_num = #children; desc_nos = {desc_no : (desc_no,rest) in children}; child_pos = plus_scan(desc_nos); child_pos = {cs + 1 : cs in child_pos }; desc_no = sum(desc_nos) + 1; total_mass = sum({ abs(cm_mass(cm)) : (d,cm,rest) in children }); (mxs,mys,mzs) = unzip3({ (abs(m)*x,abs(m)*y,abs(m)*z) : (d,cmass(m,vect(x,y,z)),rest) in children}); (mx,my,mz) = ((sum(mxs))/total_mass,sum(mys)/total_mass,sum(mzs)/total_mass); (child_mpoles,child_centers) = unzip({(mpole,cm_pos(cm)): (d,cm,pvect,mpole,rest) in children}); child_mpoles_disp = zip(child_mpoles,{new_coords(child_centers,(mx,my,mz)): child_centers}); mpole = sum_terms({ser_trans_mpole(child_mpoles_disp): child_mpoles_disp}); flat_desc = flatten(desc); in [(desc_no,cmass(total_mass,vect(mx,my,mz)),Empty_pvect,mpole,cell_box,child_pos)] ++ flat_desc $ % the append is very expensive % function make_main_tree(v0) = let v = {(x,y,z,m) : (x,y,z,vx,vy,vz,m) in v0}; (x_coords,y_coords,z_coords) = unzip3({(x,y,z) : (x,y,z,rest) in v}); x_min = min_val(x_coords); y_min = min_val(y_coords); z_min = min_val(z_coords); dx = max_val(x_coords) - x_min; dy = max_val(y_coords) - y_min; dz = max_val(z_coords) - z_min; in make_tree(v,make_box((x_min+dx/2.0,y_min+dy/2.0,z_min+dz/2.0),(dx,dy,dz))) $ function well_separated(d,cell_centroid,cell_box,leaf_centroid,leaf_box) = let % dr = subv(vect(cell_centroid),vect(leaf_centroid)); dsq = dotvp(dr,dr); cell_size = max3(size(cell_box)); in if (cell_size < Alpha*sqrt(dsq)) then (t,t) else if (d==1) then (t,f) else (f,f) $ % dr = subv(vect(get_center(cell_box)),vect(get_center(leaf_box))); r1 = max3(size(cell_box))/2.0; r2 = max3(size(leaf_box))/2.0; dist = sqrt(dotvp(dr,dr)) - sqrt(3.0)*(r1+r2); in if (2.0*r1 < Alpha*dist) then (t,t) else if (d==1) then (t,f) else (f,f) $ function dir_force((x0,y0,z0),(x1,y1,z1,m1)) = let dx = x1-x0; dy = y1-y0; dz = z1-z0; rsq = (dx^2 + dy^2 + dz^2); mrcubed = m1/(rsq*sqrt(rsq)) in dx*mrcubed,dy*mrcubed,dz*mrcubed $ function dir_acc((x0,y0,z0),v) = let v1 = {(x1,y1,z1,m1) : (x1,y1,z1,m1) in v | (x1 /= x0) or (y1 /= y0) or (z1 /= z0)}; forces = {dir_force((x0,y0,z0),v1): v1}; in sumv(forces) $ function leaf_leaf_force(pvect,leaf_vect,pi) = (pi,{dir_acc((x,y,z),pvect): (x,y,z,m) in leaf_vect}) $ function cell_leaf_force(cm,mpole,center,pi) = let disp = new_coords2(cm_pos(cm),center); lexp = ser_l_from_m(mpole,disp); in (pi,lexp) $ function serial_add(vect, result) = if (#vect == 0) then result else let (head,rest) = head_rest(vect); result = {head + result : head; result}; in serial_add(rest,result) $ function sum_accs(acc_vect) = let xyz_acc = {unzip3(acc_vect) : acc_vect}; (xacc,yacc,zacc) = unzip3(xyz_acc); num_terms = #xacc[0]; result = dist(0.0,num_terms); x_res = serial_add(xacc,result); y_res = serial_add(yacc,result); z_res = serial_add(zacc,result); in zip3(x_res,y_res,z_res) $ function nested_get(a,i) = let lens = {#i:i}; vals = a->flatten(i) in partition(vals,lens) $ function get_force(curr_level,accs,lexps,tr,num_leafs,num_leaf_vect,intrs) = let (flags1,flags2) = unzip({ well_separated(d,cm_pos(centroid), cell_box,cm_pos(leaf_centroid),leaf_box) : ((d,centroid,pvect,mpole,cell_box,kids), ((ld, leaf_centroid,lvect,leaf_box),li),cell_ind) in curr_level}); divided = split(zip(curr_level,flags2),flags1); (next_inds,waste) = unzip(divided[0]); % discard 2nd flag % indices = {{i+cell_ind : i in kids(tree_cell) } : (tree_cell,leaf,cell_ind) in next_inds}; nested_kids = nested_get(tr,indices); nested_next_level = {{nested_kids,leaf,indices : indices;nested_kids} : (tree_cell,leaf,cell_ind) in next_inds; nested_kids; indices}; next_level = flatten(nested_next_level); this_level = divided[1]; divided = split(unzip(this_level)); intrs = intrs + #this_level; forces1 = {cell_leaf_force(centroid,mpole,get_leaf_center(leaf),l_ind) : ((d,centroid,pvect,mpole,cell_box,rest), (leaf,l_ind),cell_ind) in divided[1]}; forces2 = { leaf_leaf_force(pvect,get_leaf_pvect(leaf),l_ind) : ((d,centroid,pvect,mpole,cell_box,rest),(leaf,l_ind),cell_ind) in divided[0]}; collected1 = int_collect(forces1); collected2 = int_collect(forces2); new_lexps = dist([]([(float,float)]),num_leafs); d_lexp = { i,sum_terms(v) : (i,v) in collected1 }; new_lexps = new_lexps <- d_lexp; % check if better to allocate zero-vects instead of nulls % lexps = {sum_2_terms(lexps,new_lexps) : lexps; new_lexps}; d_accs = {(i,sum_accs(accs)) : (i,accs) in collected2}; new_accs = {dist((0.0,0.0,0.0),num_leaf_vect) : num_leaf_vect}; new_accs = new_accs <- d_accs; accs = {{addv(vect(new_accs),accs) : new_accs; accs} : new_accs; accs}; in if (#next_level == 0) then (lexps,accs,intrs) else get_force(next_level,accs,lexps,tr,num_leafs,num_leaf_vect,intrs) $ function newpt((x,y,z,vx,vy,vz,m),(ax,ay,az)) = let tm = 0.15; nvx = vx + ax*tm; nvy = vy + ay*tm; nvz = vz + az*tm; nx = x + (vx + nvx)*tm/2.0; ny = y + (vy + nvy)*tm/2.0; nz = z + (vz + nvz)*tm/2.0; in (nx,ny,nz,nvx,nvy,nvz,m) $ function get_blk_force(start,size,tr,acc,leafs,tot_num_leafs,tot_intrs) = let end = start+size; end = if (end > tot_num_leafs) then tot_num_leafs else end; sub_leafs = subseq(leafs,start,end); num_leafs = end - start; start = end; root = tr[0]; leaf_indices = plus_scan(dist(1,num_leafs)); init_level = {root,(leaf,li),0: leaf in sub_leafs; li in leaf_indices}; pvect = {vect_new_coords(cm_pos(centroid),pvect) : (d,centroid,pvect,cbox) in sub_leafs}; num_leaf_vect = {#pvect : pvect}; pmvect = {pvect: (d,centroid,pvect,cbox) in sub_leafs}; acc0 = {dist(vect(0.0,0.0,0.0),num_leaf_vect) : num_leaf_vect}; lexps = dist([]([(float,float)]),num_leafs); (blk_lexp,blk_acc,intrs) = get_force(init_level,acc0,lexps,tr,num_leafs, num_leaf_vect,0); blk_acc2 = {{serial_gradient(blk_lexp,pvect): pvect}: pvect; blk_lexp}; tot_acc = {{addv(blk_acc,vect(blk_acc2)) : blk_acc; blk_acc2}: blk_acc; blk_acc2}; acc = acc ++ flatten({zip(pmvect,tot_acc): tot_acc; pmvect}); tot_intrs = tot_intrs + intrs; w = if (rem(end,100)==0) then print_string(@end++" ") else t; in if (start==tot_num_leafs) then (acc,tot_intrs) else get_blk_force(start,size,tr,acc,leafs,tot_num_leafs,tot_intrs) $ function blkwise_force(n,tr,block) = let acc = []((float,float,float,float),vect); leafs = {(d,centroid,pvect,cbox) :(d,centroid,pvect,mpole,cbox,rest) in tr | d==1}; tot_num_leafs = #leafs; w = print_string("LEAFS = "++@tot_num_leafs++" "); in get_blk_force(0,block,tr,acc,leafs,tot_num_leafs,0) $ function get_new_pts(particles,block) = let tr = make_main_tree(particles); n = #particles; indices = plus_scan(dist(1,n)); (acc,intr) = blkwise_force(n,tr,block); acc = {a,b : a,vect(b) in acc}; in (acc,intr) $ function cv1(v) = {a[0],a[1],a[2],a[3],a[4],a[5],a[6]: a in v} $ % Main function call : count is # particles, blk is the number of leaves to traverse the tree for at a time. The results are the total #interactions, and the original position, charge and 3D force on each particle. The vector is [(x,y,z,charge), (fx, fy, fz)] % function pmta(count,blk) = let v0 = rpts(count); v = cv1(v0); (particle_forces,intr) = get_new_pts(v,blk); in print_string("Particle-vect after one iteration ("++@intr++ " iterations : "++@particle_forces) $