% This is the Nesl source code for Greengard's 3D Fast Multipole Method (FMM). Read "The rapid evaluation of potential fields in particle sysrtems" by L. Greengard, MIT press, for a full description of this algorithm. % % This file includes two source code files: ;; ;; 1. types.nesl : definitions for data types and functions to access their ;; fields, and some constants. ;; ;; 2. math.nesl : contains all the math functions used for 3D creation and ;; translations of multipole expansions with P terms. ;; ;; 3. main.nesl : contains the code for creating the FMM tree with the ;; multipole expansions, travsing up and down the tree and calculating the ;; force on each particle for one iteration. ;; ;; Note : Before you can load this code, you need to define ;; the number of terms in the multipole expansion P (= 1,2,3...). ;; ;; The FMM algorithm is invoked using the function FMM(vect,level) where ;; vect is the vector of particles and level is an integer specifying the ;; number of levels that the uniform FMM tree should have. ;; The vector of particles is of the type ;; ((float,float,float),(float,float,float),float) ;; where the first three values are the particle's X, Y and Z displacements, ;; the next three values are its X, Y and Z velocities and the last value is ;; its mass. ;; This function returns a vector of pairs (particle,force) where particle ;; is the particle that was given in the input of the type described above, ;; and force if (float,float,float) which is the force on the particle ;; in 3D. % %---------------------- types.nesl ---------------------------% % The number of terms in the multipole expansions : the files should be reloaded if its value is changed % P = 2; % Some constants commonly used % Num_Terms = (p+1)*(p+2)/2; Index_Vector = flatten({{(n,m): m in iseq(0,1,n+1)}: n in iseq(0,1,P+1)}); Lens = let temp_vect = {{(n,m): m in iseq(0,1,n+1)}: n in iseq(0,1,P+1)}; in {#temp_vect: temp_vect}; Zero_Exp = {{(0.0,0.0): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)} $ Half_Zero_Exp = {{(0.0,0.0): m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)} $ % data type definitions % datatype vect (float,float,float); datatype particle (vect,float); % functions to access the fields of the datatypes % function mass((pos,m)) = m $ function get_r(vect(a,b,c),m) = a $ function get_th(vect(a,b,c),m) = b $ function get_ph(vect(a,b,c),m) = c $ function real(a,b) = a $ function imag(a,b) = b $ % functions for complex numbers, represented as a pair of floats % function conjugate(a,b) = (a,-b) $ function add_c((a,b),(c,d)) = (a+c,b+d) $ function mult_r_c(r,(a,b)) = (r*a,r*b) $ function mult_c_c((a,b),(c,d)) = (a*c-b*d,a*d+b*c) $ function sum_c(v) = let (v1,v2) = unzip(v) in (sum(v1),sum(v2)) $ % functions for 3D vectors defined above % function add_vect(v1,v2) = {(a1+a2,b1+b2,c1+c2) : (a1,b1,c1) in v1;(a2,b2,c2) in v2}; function unzip3(v) = let (xs,yzs) = unzip(v); (ys,zs) = unzip(yzs); in (xs,ys,zs) $ function sumv(vec) = let (vecx,vecy,vecz) = unzip3(vec); in (sum(vecx),sum(vecy),sum(vecz)) $ function zero_vect(num) = let null = Zero_Exp; in dist(null,num^3) $ function empty_vect(num) = let null = []([(float,float)]); in dist(null,num^3) $ % functions for manipulating multipole expansions of type [[(float,float)]] % function sum_2_terms(term_1,term_2) = if (#term_1==0) then term_2 else if (#term_2==0) then term_1 else let lens = {#term : term in term_1 }; t1 = flatten(term_1); t2 = flatten(term_2); sum_12 = {add_c(t1,t2) : t1; t2}; in partition(sum_12,lens) $ function sum_terms(term_vec) = let filt_vec = {term : term in term_vec | #term>0 }; in if (#filt_vec == 0) then []([(float,float)]) else let lens = {#term : term in filt_vec[0] }; flat_terms = {flatten(filt_vec) : filt_vec}; num = #flat_terms[0]; seq = iseq(0,1,num); vects = {{elt(flat_terms,i):flat_terms}: i in seq}; sums = {sum_c(vects) : vects}; in partition(sums,lens) $ % some more useful functions % function min3(a,b,c) = min(a,min(b,c)) $ function max3(a,b,c) = max(a,max(b,c)) $ function nested_get(a,i) = let lens = {#i:i}; vals = a->flatten(i) in partition(vals,lens) $ function nested_unzip(a) = let lens = {#a:a}; (v1,v2) = unzip(flatten(a)); in (partition(v1,lens),partition(v2,lens)) $ %---------------------- math.nesl ---------------------------% function factorial(i) = if (i<=1) then 1.0 else float(i)*factorial (i-1) $ function odd_factorial(i) = if (i <= 1) then 1.0 else float(i)*odd_factorial(i-2) $ % some constant vectors % Fact = {factorial(i) : i in iseq(0,1,2*P+1)}; Odd_Fact = {odd_factorial(2*i-1) : i in iseq(0,1,P+1)}; NM_term = {{sqrt(Fact[n-abs(m)]/Fact[n+abs(m)]) : m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)}; % auxiliary functions for the math routines % function expt1(a,b) = if (b==0.0) then 1.0 else if (a==0.0) then 0.0 else expt(a,b) $ function expt2(a,b) = if (b<0) then 1.0/(a^b) else a^b $ function get_pos(n,m) = n*(n+1)/2 + m $ function get_root_term(v,n,m) = if (abs(m) > n) then 0.0 else v[n][m] $ function get_term(v,n,m) = if ((abs(m) > n) or (n>P)) then (0.0,0.0) else if (m < 0) then conjugate(v[n][-m]) else v[n][m] $ function minus_one_raised_to(i) = if evenp(i) then 1.0 else -1.0 $ % resets indices not within bounds % function get_inds(a,b) = if ((a<=P) and (abs(b)<=a)) then (a,abs(b)) else (0,0) $ function get_flags(a,b) = if ((a>P) or (abs(b)>a)) then -1 else if (b<0) then 0 else 1 $ function correct((a,b),flag) = if (flag == -1) then (0.0,0.0) else if (flag == 0) then (a,-b) else (a,b) $ % Calculating the terms A, J1, J2, J3 used in calculating and translating % % the multipole expansions % function get_A(u,v) = if ((u < abs(v))or(u > P)) then 1.0 else minus_one_raised_to(u)/(sqrt(Fact[u-v]*Fact[u+v])) $ function get_J1(u,v) = if (u*v>=0) then 1.0 else minus_one_raised_to(min(abs(u),abs(v))) $ function get_J2(u,v,w) = let coeff = minus_one_raised_to(w); in if (u*v<=0) then coeff else coeff * minus_one_raised_to(min(abs(u),abs(v))) $ function get_J3(u,v,w) = let coeff = minus_one_raised_to(u); in if (v*w < 0) then coeff*minus_one_raised_to(v) else if ((v*w>0) and (abs(w) n) or (n > P) or (n<0)) then (0.0,0.0) else if (m<0) then conjugate(Y_term(n,-m,th,ph)) else let m_fl = float(m); in mult_r_c(get_root_term(NM_term,n,m) * legendre(n,abs(m),cos(th)), (cos(m_fl*ph),sin(m_fl*ph))) $ % Calculates Y(n,-m) given P(n,m), n and m % function Y_conj(legendre,n,m,ph) = let m_ph = float(m)*ph in mult_r_c(get_root_term(NM_term,n,m) * legendre,(cos(m_ph),-sin(m_ph))) $ function Y_term_trans(n,m,th,ph) = if ((abs(m) > n) or (n > P)) then (0.0,0.0) else let m_fl = float(m); in mult_r_c(get_root_term(NM_term,n,m) * legendre(n,abs(m),cos(th)), (cos(m_fl*ph),sin(m_fl*ph))) $ % The multipole expansion term % function M_term(n,m,particles) = sum_c({mult_r_c(mass(body) * (get_r(body)^n),Y_term(n,-m,get_th(body),get_ph(body))): body in particles}) $ function M_contrib(n,m,r,mass,ph,legend) = let m_ph = float(m)*ph; real_coeff = get_root_term(NM_term,n,m)*legend*(r^n)*mass; in mult_r_c(real_coeff,(cos(m_ph),-sin(m_ph))) $ % calculates multipole expansion with P terms % function get_M_terms(particles) = if (#particles==0) then []([(float,float)]) else {{ M_term(n,m,particles): m in iseq(0,1,n+1)} : n in iseq(0,1,P+1)} $ function serial_M_terms(result,particles) = if (#particles==0) then result else let (head,rest) = head_rest(particles); legend = ser_legendre(cos(get_th(head))); mss = mass(head); r = get_r(head); ph = get_ph(head); head_vect = { M_contrib(n,m,r,mss,ph,legend) : legend; (n,m) in Index_Vector}; result = { add_c(head_vect,result) : result; head_vect}; in serial_M_terms(result,rest) $ % Calculates the multipole expansion of "particles" % function ser_get_M_terms(particles) = if (#particles==0) then []([(float,float)]) else let result = dist((0.0,0.0),Num_Terms); flat_result = serial_M_terms(result, particles); in partition(flat_result,Lens) $ % Translates the M(j,k) term of a multipole expansion % function get_new_M(j,k,old_M,vect(rho,alpha,beta)) = let temp_vect = {{mult_r_c(get_J1(m,k-m)* get_A(n,m)* get_A(j-n,k-m)* (rho^n)/get_A(j,k), mult_c_c(get_term(old_M,j-n,k-m),Y_term(n,-m,alpha,beta))) : m in iseq(-n,1,n+1)}: n in iseq(0,1,j+1)}; in sum_c({sum_c(v): v in temp_vect}) $ % "disp" is vector from parent center as origin to child center % function trans_mpole(old_M,disp) = if (#old_M==0) then []([(float,float)]) else {{ get_new_M(j,k,old_m,disp) : k in iseq(0,1,j+1)} : j in iseq(0,1,P+1)} $ % translates serially to save memory due to virtual processors % function serial_M_trans(old_M,Y_terms,result,ind_vect) = if (#ind_vect==0) then result else let ((j,k),rest) = head_rest(ind_vect); A_jk = get_A(j,k); M_inds = {{get_inds(j-n,k-m) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; M_flags = {{get_flags(j-n,k-m) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; M_pos = {{get_pos(M_inds) : M_inds} : M_inds}; old_M_terms = nested_get(flatten(old_M),M_pos); old_M_terms = {{correct(old_M_terms,M_flags): old_M_terms; M_flags}: old_M_terms; M_flags}; coeff_vect = {{get_J1(m,k-m)*get_A(j-n,k-m)/A_jk : m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; temp_vect = {{mult_r_c(coeff_vect,mult_c_c(Y_terms, old_M_terms)) : old_M_terms; Y_terms; coeff_vect}: old_M_terms; Y_terms; coeff_vect}; curr_result = sum_c({sum_c(v): v in temp_vect}); result = rep(result, curr_result, get_pos(j,k)); in serial_M_trans(old_M,Y_terms,result,rest) $ function ser_trans_mpole(old_M,vect(r,th,ph)) = if (#old_M==0) then []([(float,float)]) else let legend = partition(ser_legendre(cos(th)),Lens); inds = {{get_pos(n,abs(m)) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; full_lens = {#inds: inds}; r_terms = mult_scan(dist(r,P+1)); Y_temp = {{ mult_r_c((r_terms)*get_A(n,m),Y_conj(legend,n,m,ph)) : legend; m in iseq(0,1,n+1)}: r_terms; legend; n in iseq(0,1,P+1)}; Y_terms = flatten(Y_temp) -> flatten(inds); indices = flatten({{(n,m): m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}); Y_terms = {if (m<0) then conjugate(Y) else Y : Y in Y_terms; (n,m) in indices}; Y_terms = partition(Y_terms,full_lens); result = dist((0.0,0.0),Num_Terms); result = serial_M_trans(old_M,Y_terms,result,Index_Vector); in partition(result,Lens) $ % Conversion of a multipole expansion to a local expansion % function get_L_from_M(j,k,M_terms,vect(rho,alpha,beta)) = let %temp_vect = {{ mult_r_c(get_J2(k,m,j)* get_A(n,m)*get_A(j,k) /% temp_vect = {{ mult_r_c((get_J2(k,m,n)* get_A(n,m)*get_A(j,k)) / (get_A(j+n,m-k) * (rho^(j+n+1))), mult_c_c(get_term(M_terms,n,m),Y_term(j+n,m-k,alpha,beta))) : m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; %(t1,t2) = temp_vect[0][0]; w = if (t1==0.0) then print_string("L_FROM_M "++@temp_vect++" "++@j++" "++@k++" "++@rho++" "++@alpha++" "++@beta++" "++@M_terms++" ") else f;% in sum_c({sum_c(v): v in temp_vect}) $ % disp is vector from center of local box to center of i-list box % function m_to_l(M_terms,disp) = if (#M_terms==0) then []([(float,float)]) else {{ get_L_from_M(j,k,M_terms,disp) : k in iseq(0,1,j+1)} : j in iseq(0,1,P+1)} $ function serial_M_to_L(sym_M_terms,Y_terms,result,ind_vect,rho,A_nm) = if (#ind_vect==0) then result else let ((j,k),rest) = head_rest(ind_vect); A_jk = get_A(j,k); Y_inds = {{j+n,m-k: m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; filt_inds = {{ get_inds(a,b) : (a,b) in Y_inds}: Y_inds}; flags = {{ get_flags(a,b) : (a,b) in Y_inds}: Y_inds}; offsets = {{get_pos(filt_inds): filt_inds}: filt_inds}; Y_seq = nested_get(flatten(Y_terms),offsets); Y_seq = {{correct(Y_seq,flags): Y_seq;flags} : Y_seq;flags}; temp_vect = {{ mult_r_c((get_J2(k,m,n)* A_nm * A_jk) / (get_A(j+n,m-k) * (rho^(j+n+1))), mult_c_c(sym_M_terms,Y_seq)) : A_nm; m in iseq(-n,1,n+1);sym_M_terms; Y_seq} : A_nm; n in iseq(0,1,P+1);sym_M_terms; Y_seq}; current_result = sum_c({sum_c(v): v in temp_vect}); result = rep(result,current_result,get_pos(j,k)); in serial_M_to_L(sym_M_terms,Y_terms,result,rest,rho,A_nm) $ function ser_l_from_m (M_terms,vect(r,th,ph)) = if (#M_terms==0) then []([(float,float)]) else let legend = ser_legendre(cos(th)); Y_terms = partition({mult_r_c(get_root_term(NM_term,n,m)*legend, (cos(float(m)*ph),sin(float(m)*ph))): (n,m) in Index_Vector; legend},Lens); sym_M_terms = {{get_term(M_terms,n,m): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; A_nm = {{get_A(n,m): m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; result = dist((0.0,0.0),Num_Terms); result = serial_M_to_L(sym_M_terms,Y_terms,result,Index_Vector,r,A_nm); in partition(result,Lens) $ % translation of a local expansion % function get_new_L(j,k,old_L,vect(rho,alpha,beta)) = let temp_vect = {{mult_r_c(get_J3_new(n,m,k)* get_A(j,k)*get_A(n,m)* (rho^(n))/get_A(n+j,m+k), mult_c_c(get_term(old_L,n+j,m+k),Y_term(n,m,alpha,beta))) : m in iseq(-n,1,n+1)} : n in iseq(0,1,P-j+1)}; in sum_c({sum_c(v): v in temp_vect}) $ % disp is vector from child center as origin to parent center % function trans_local(old_L,disp,if_bodies) = if ((#old_L==0)or(not(if_bodies))) then []([(float,float)]) else {{ get_new_L(j,k,old_L,disp) : k in iseq(0,1,j+1)} : j in iseq(0,1,P+1)} $ function serial_L_trans(old_L,Y_terms,result,ind_vect) = if (#ind_vect==0) then result else let ((j,k),rest) = head_rest(ind_vect); A_jk = get_A(j,k); L_inds = {{get_inds(j+n,k+m) : m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; L_flags = {{get_flags(j+n,k+m): m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; L_offsets = {{get_pos(a,b) : (a,b) in L_inds}: L_inds}; L_terms = nested_get(flatten(old_L),L_offsets); L_terms = {{correct(L_terms,L_flags) : L_terms; L_flags}: L_terms; L_flags}; coeff_vect = {{get_J3_new(n,m,k)* A_jk/get_A(n+j,m+k) : m in iseq(-n,1,n+1)}: n in iseq(0,1,P+1)}; temp_vect = {{mult_r_c(coeff_vect,mult_c_c(Y_terms, L_terms)) : L_terms; Y_terms; coeff_vect}: L_terms; Y_terms; coeff_vect}; curr_result = sum_c({sum_c(v): v in temp_vect}); result = rep(result, curr_result, get_pos(j,k)); in serial_L_trans(old_L,Y_terms,result,rest) $ function ser_trans_local(old_L,vect(r,th,ph),if_bodies) = if ((#old_L==0)or(not(if_bodies))) then []([(float,float)]) else let legend = partition(ser_legendre(cos(th)),Lens); inds = {{get_pos(n,abs(m)) : m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}; full_lens = {#inds: inds}; Y_temp = {{mult_r_c(get_A(n,m)*(r^(n))*get_root_term(NM_term,n,m)* legend, (cos(float(m)*ph),sin(float(m)*ph))) : legend; m in iseq(0,1,n+1)}:legend; n in iseq(0,1,P+1)}; Y_terms = flatten(Y_temp) -> flatten(inds); indices = flatten({{(n,m): m in iseq(-n,1,n+1)}:n in iseq(0,1,P+1)}); Y_terms = {if (m<0) then conjugate(Y) else Y : Y in Y_terms; (n,m) in indices}; Y_terms = partition(Y_terms,full_lens); result = dist((0.0,0.0),Num_Terms); result = serial_L_trans(old_L,Y_terms,result,Index_Vector); in partition(result,Lens) $ % redefines arctan as needed % function atan2(x,y) = if (x==0.0) then if (y < 0.0) then -pi/2.0 else pi/2.0 else if (x>0.0) then atan(y/x) else if (y>=0.0) then atan(y/x) + pi else atan(y/x) - pi $ % calculates the displacement vector between two points in space % function new_coords2((x,y,z),(x0,y0,z0)) = let dx = x - x0; dy = y - y0; dz = z - z0; r = sqrt(dx^2 + dy^2 + dz^2); th = if (r==0.0) then 0.0 else acos(dz/r); ph = if ((dx==0.0) and (dy==0.0)) then 0.0 else atan2(dx,dy); in vect(r,th,ph) $ function new_coords((x,y,z),(x0,y0,z0)) = let dx = x - x0; dy = y - y0; dz = z - z0; r = sqrt(dx^2 + dy^2 + dz^2); th = if (r==0.0) then 0.0 else acos(dz/r); ph = atan2(dx,dy); in vect(r,th,ph) $ function find_offsets(new_centre,points) = {new_coords(points,new_centre) : points} $ function find_offsets2(new_centre,points) = {new_coords2(points,new_centre) : points} $ function wrt_cell_centres((coords,m),centre) = (new_coords(coords,centre),m) $ % finding potential from a local expansion % function potential((vect(r,th,ph),m),local_exp) = if (#local_exp==0) then 0.0 else let temp_vect = {{mult_r_c(r^n,mult_c_c(get_term(local_exp,n,m),Y_term(n,m,th,ph))): m in iseq(-n,1,n+1)} : n in iseq(0,1,P+1)}; in real(sum_c({sum_c(temp_vect):temp_vect})) $ % potential of a set of particles due to a local axpansion % function get_cell_pot(particles,local) = {potential(body,local): body in particles} $ function get_pot(cells,locals) = {get_cell_pot(cells,locals): cells;locals} $ % finding gradient of the potential to get the force % function r_grad((r,th,ph),(real_L,imag_L),n,m) = if (n==0) then 0.0 else let (real_Y,imag_Y) = Y_term(n,m,th,ph); coeff = if (m==0) then 1.0 else 2.0; in coeff * float(n) * (r^(n-1))* (real_Y*real_L - imag_Y*imag_L) $ function th_ph_grad((r,th,ph),(real_L,imag_L),n,m) = if (n==0) then (0.0,0.0) else let coeff = if (m==0) then 1.0 else 2.0; m_fl = float(m); abs_m = abs(m); cos_th = cos(th); sin_th = sin(th); sin_square = 1.0-cos_th^2; common_term = get_root_term(NM_term,n,m)*(r^(n-1)); legendre_1 = legendre(n,abs_m,cos_th); legendre_2 = legendre(n,abs_m+1,cos_th); %coeff1 = common_term *sin_th*((cos_th*abs(m_fl)*legendre_1/sin_square) -legendre_2/sqrt(sin_square));% coeff1 = common_term*sin_th*((-cos_th*abs(m_fl)*legendre_1/sin_square) -legendre_2/sqrt(sin_square)); m_ph = m_fl*ph; cos_term = cos(m_ph); sin_term = sin(m_ph); th_term = coeff * -coeff1 * (cos_term*real_L - sin_term*imag_L); coeff2 = m_fl*legendre_1*common_term/sin_th; ph_term = coeff * coeff2 * (- sin_term*real_L - cos_term*imag_L); in (th_term,ph_term) $ % gets the acceleration in xyz coordinates from those in spherical coords % function get_xyz_acc((r_comp,th_comp,ph_comp),(th,ph)) = let costheta = cos(th); sintheta = sin(th); cosphi = cos(ph); sinphi = sin(ph); x_acc = r_comp*sintheta*cosphi + th_comp*costheta*cosphi - ph_comp*sinphi; y_acc = r_comp*sintheta*sinphi + th_comp*costheta*sinphi + ph_comp*cosphi; z_acc = r_comp*costheta - th_comp*sintheta; in (x_acc,y_acc,z_acc) $ % gradient in xyz coords due to a local expansion on a particle at a given ;; point with a given mass % function gradient(local_exp,(vect(coords),mass)) = if (#local_exp==0) then (0.0,0.0,0.0) else let (r,th,ph) = coords; temp_vect1 = {{r_grad(coords,local_term,n,m), th_ph_grad(coords,local_term,n,m) : m in iseq(0,1,n+1);local_term in local_exp}: n in iseq(0,1,P+1);local_exp}; temp_vect2 = sumv({sumv(temp_vect1) : temp_vect1}); in get_xyz_acc(temp_vect2,th,ph) $ function serial_r_grad(coords,local_exp,ind_vect) = if (#ind_vect==0) then [](float) else let (h_l,r_l) = head_rest(local_exp); (h_i,r_i) = head_rest(ind_vect); curr_res = r_grad(coords,h_l,h_i); in [curr_res]++serial_r_grad(coords,r_l,r_i) $ function serial_th_ph_grad(coords,local_exp,ind_vect) = if (#ind_vect==0) then []((float,float)) else let (h_l,r_l) = head_rest(local_exp); (h_i,r_i) = head_rest(ind_vect); curr_res = th_ph_grad(coords,h_l,h_i); in [curr_res]++serial_th_ph_grad(coords,r_l,r_i) $ function serial_gradient(local_exp,(vect(coords),mass)) = if (#local_exp==0) then (0.0,0.0,0.0) else let (r,th,ph) = coords; temp_vect1=partition(serial_r_grad(coords,flatten(local_exp),Index_vector),Lens); temp_vect2=partition(serial_th_ph_grad(coords,flatten(local_exp),Index_vector),Lens); temp_vect3 = {zip(temp_vect1,temp_vect2):temp_vect1;temp_vect2}; temp_vect4 = sumv({sumv(temp_vect3) : temp_vect3}); in get_xyz_acc(temp_vect4,th,ph) $ function ser_gradient(local_exp,(vect(coords),mass)) = if (#local_exp==0) then (0.0,0.0,0.0) else let (r,th,ph) = coords; temp_vect1 = {{r_grad(coords,local_term,n,m): m in iseq(0,1,n+1); local_term in local_exp}: n in iseq(0,1,P+1);local_exp}; temp_vect2 = {{ th_ph_grad(coords,local_term,n,m) : m in iseq(0,1,n+1); local_term in local_exp}: n in iseq(0,1,P+1);local_exp}; temp_vect3 = {zip(temp_vect1,temp_vect2):temp_vect1;temp_vect2}; temp_vect4 = sumv({sumv(temp_vect3) : temp_vect3}); in get_xyz_acc(temp_vect4,th,ph) $ % calculates force (gradient of potential) for a set of particles % function gradient_at_leaf(locals,particles) = {serial_gradient(locals,body): body in particles} $ % new displacement & velocity of a particle due to given acceleration % function new_particle(((x,y,z),(vx,vy,vz),m),(ax,ay,az)) = let tm = 0.15; nvx = vx + ax*tm; nvy = vy + ay*tm; nvz = vz + az*tm; nx = x + (vx + nvx)*tm/2.0; ny = y + (vy + nvy)*tm/2.0; nz = z + (vz + nvz)*tm/2.0; in ((nx,ny,nz),(nvx,nvy,nvz),m) $ function update(particles,accs) = {new_particle(particles,accs) : particles;accs} $ % disp is vector from center of local box to center of i-list box % %------------------------- main.nesl -----------------------------% % The code for the steps of Fast Multipole Method in 3-dimensions % %-----------------------------------------------------------------% % some auxiliary functions used by the algorithm % function get_coord(i,(x,y,z),rest) = if (i==0) then x else if (i==1) then y else z $ function centre(min,size,a) = min + size*(float(a)+0.5) $ function sort_coord(particles,min,max,number,coord) = let M = max-min; size = (M+1.0)/float(number); labels= {(trunc((get_coord(coord,body)-min)/size),body) : body in particles}; empty_vect = dist([]((float,float,float),(float,float,float),float),number); in empty_vect <- int_collect(labels); % places particles into cells given the min & max coordinates and the number of cells along a dimension % function place_into_cells(particles,min,max,number) = let x_sorted = sort_coord(particles,min,max,number,0); y_sorted = flatten({sort_coord(x_sorted,min,max,number,1): x_sorted}); in flatten({sort_coord(y_sorted,min,max,number,2): y_sorted}) $ % finds centres of cells at a given level % function get_centres(min,max,number) = let size = (max-min+1.0)/float(number); seq = iseq(0,1,number); centres = flatten(flatten({{{(centre(min,size,i),centre(min,size,j), centre(min,size,k)) : k in seq} : j in seq} : i in seq})); in centres $ % finds spherical coordinates of particles wrt the centres of the cells they are in % function shift_coords(particles,min,max,number) = let centres = get_centres(min,max,number); in {{wrt_cell_centres(particles,centres): particles}: particles;centres} $ function not_close(i,j,k) = not((abs(i) <= 2) and (abs(j) <= 2) and (abs(k) <= 2)) $ function not_zero(i,j,k) = not((i == 0) and (j == 0) and (k == 0)) $ % determines if the indices for the cell are within bounds % function in_bounds(x,y,z,num) = (x>=0) and (y>=0) and (z>=0) and (x head_o; head_md = zip(head_m,head_d); filt_head = {(hm, hd) : (hm, hd) in head_md | #hm >0}; (head_f,rest_f) = head_rest(if_bodies); curr_result = parallel_ilist_m_to_l(filt_head,head_f); num_done = num_done + 1; w=if (rem(num_done,8) == 0) then print_string(@num_done++" ") else t; in [curr_result]++par_ser_m_to_l(mpoles,rest_d,rest_f,rest_o,num_done) $ % recursively sums terms of a multipole expansion % bch = 12/P; function ser_sum_terms(terms) = if (#terms==0) then []([[(float, float)]]) else let num = #terms; (head,rest) = if (num <= bch) then (terms,[]([[[(float, float)]]])) else (take(terms,bch),drop(terms,bch)); curr_result = {sum_terms(head) : head}; in curr_result ++ ser_sum_terms(rest) $ % creates multipole expansions for each cell % function form_mpoles(particles) = {ser_get_M_terms(particles):particles} $ % calculates the neighbours for a cell at a given level % function get_neighbours((x,y,z),number) = let seq = iseq(-2,1,3); res = flatten(flatten({{{(x+i,y+j,z+k): k in seq |not_zero(i,j,k) and in_bounds(x+i,y+j,z+k,number)}: j in seq} : i in seq })) ; in res $ % neighbours including the cell itself % function get_incl_neighbours((x,y,z),number) = let seq = iseq(-2,1,3); in flatten(flatten({{{(x+i,y+j,z+k): k in seq |in_bounds(x+i,y+j,z+k,number)}: j in seq} : i in seq })) $ % calculates the set of cells in the interaction list of a non-empty cell % function ilist_cell((x,y,z),number,if_bodies) = if (not(if_bodies)) then []((int,int,int)) else let (px,py,pz) = (x/2,y/2,z/2); seq1 = iseq(0,1,2); par_neighbours = get_neighbours((px,py,pz),number/2); ilist = flatten(flatten(flatten({{{{(2*pnx+i,2*pny+j,2*pnz+k): k in seq1 | not_close(2*pnx+i-x,2*pny+j-y,2*pnz+k-z) } : j in seq1}:i in seq1}: (pnx,pny,pnz) in par_neighbours}))); in ilist $ % create the list of cells as (x,y,z) indices at a givem level % function get_seq(number) = let seq = iseq(0,1,number); in flatten(flatten({{{(i,j,k):k in seq} : j in seq} : i in seq})) $ % neighbours at a given level % function get_near_inds(number) = let cell_seq = get_seq(number); in {get_incl_neighbours(cell_seq,number) : cell_seq} $ % I-list cells at a given level % function get_ilist_inds(indices,number,if_bodies) = {ilist_cell(indices,number,if_bodies) : indices;if_bodies} $ % offsets of a cell with indices (x,y,z) in the flattened list of cells % function get_nested_offsets(v,number) = {{(i*number^2 + j*number + k) : (i,j,k) in v} : v} $ % given the multipole expansions at a given level, get_l makes an upward pass followed by a downward pass to calculate the local expansions at that level % function get_l(mpoles,number,min,max) = if (number<=2) then empty_vect(number) else let limit = number/2; if_bodies = {#mpoles>0: mpoles}; curr_indices = get_seq(number); par_indices = get_seq(limit); child_indices = {flatten(flatten({{{(x,y,z): z in [2*k,2*k+1]} : y in [2*j,2*j+1]}: x in [2*i,2*i+1]})): (i,j,k) in par_indices}; child_offsets = get_nested_offsets(child_indices,number); parent_centres = get_centres(min,max,limit); kid_centres = get_centres(min,max,number); % paired to do one nested get % kid_centres_mpoles = zip(kid_centres,mpoles); nested_centres_mpoles = nested_get(kid_centres_mpoles,child_offsets); (nested_kid_centres,nested_kid_mpoles) = nested_unzip(nested_centres_mpoles); parent_to_child_offsets = {find_offsets(parent_centres,nested_kid_centres) : parent_centres;nested_kid_centres}; parent_mpoles = {sum_terms({ser_trans_mpole(kid_mpole,disp1): kid_mpole in nested_kid_mpoles; disp1 in parent_to_child_offsets}): nested_kid_mpoles; parent_to_child_offsets}; % recursive call to get local expansions % par_locals = get_l(parent_mpoles,limit, min,max); % paired to do one nested get % par_centres_locals = zip(parent_centres,par_locals); parent_indices = {(x/2,y/2,z/2) : (x,y,z) in curr_indices}; parent_offsets = {(x*limit^2 + y*limit + z): (x,y,z) in parent_indices}; (distr_par_centres_locals) = par_centres_locals -> parent_offsets; (distr_par_centres,distributed_locals) = unzip(distr_par_centres_locals); child_to_par_offsets = { new_coords(distr_par_centres, kid_centres) : distr_par_centres; kid_centres}; sub_locals = {ser_trans_local(distributed_locals,disp,if_bodies) : disp in child_to_par_offsets; distributed_locals;if_bodies}; ilist_inds = get_ilist_inds(curr_indices,number,if_bodies); ilist_offsets = get_nested_offsets(ilist_inds,number); ilist_centres = nested_get(kid_centres,ilist_offsets); ilist_disp = {find_offsets2(kid_centres,ilist_centres) : ilist_centres; kid_centres}; ilist_locals = par_ser_m_to_l(mpoles,ilist_disp,if_bodies, ilist_offsets, 0); locals = {sum_2_terms(ilist_locals,sub_locals) : sub_locals; ilist_locals}; in locals $ % finds the bounding box for the particles % function find_min_max(particles) = let coords = {coords : (coords,mass) in particles}; (xs,ys,zs) = unzip3(coords); minimum = min3(min_val(xs),min_val(ys),min_val(zs)); maximum = max3(max_val(xs),max_val(ys),max_val(zs)); in (minimum,maximum) $ % acceleration at (x0,y0,z0) due to patricle ((x,y,z),m) % function get_acc(((x0,y0,z0),m0),((x,y,z),m)) = if ((x0==x) and (y0==y) and (z0==z)) then (0.0,0.0,0.0) else let dx = x-x0; dy = y-y0; dz = z-z0; rsq = (dx^2 + dy^2 + dz^2); mrcubed = m/(rsq*sqrt(rsq)) in dx*mrcubed,dy*mrcubed,dz*mrcubed $ % acceleration on a particle due to a given set of other particles % function ser_particle_cell_force(particle,other_bodies) = let acc_vec = {get_acc(particle,other_bodies): other_bodies}; in sumv(acc_vec) $ % calculates the direct forces on particles in a leaf cell due to its neighbours, for "blk" neighbours at a time % function ser_cell_force(cell,neighbours,blk) = if (#neighbours <= blk) then {ser_particle_cell_force(cell,neighbours) : cell} else let head = take(neighbours,blk); rest = drop(neighbours,blk); temp = {ser_particle_cell_force(cell,head) : cell}; in add_vect(temp, ser_cell_force(cell,rest,blk)) $ Limit = 15000; % limits number of direct interactions occuring in parallel % % calculates recursively the direct forces at the leaf level % function serial_forces(cell_neighbours,cell_list) = if (#cell_neighbours==0) then []([(float,float,float)]) else let ((cell,near_inds),rest) = head_rest(cell_neighbours); near_cells = cell_list -> near_inds; curr_result = if (#cell==0) then [](float,float,float) else ser_cell_force(cell,flatten(near_cells),Limit/(#cell)); rest_res = serial_forces(rest,cell_list); in [curr_result]++rest_res $ function ser_get_dir_forces(cells,number,min,max) = let near_inds = get_near_inds(number); near_offsets = get_nested_offsets(near_inds,number); temp_vect = {(cells,near_offsets) : cells; near_offsets}; in serial_forces(temp_vect,cells) $ % The main function call to invoke FMM % % -------------------------------------% function FMM(particle_vect,levels) = let seq = iseq(0,1,#particle_vect); temp_vect = {(i,particle_vect): i in seq; particle_vect}; number = 2^(levels-1); (min,max) = find_min_max(particle_vect); particles_in_cells = place_into_cells(particle_vect,min,max,number); cells = {{(coords,m):(coords,vel,m) in p_in_c} : p_in_c in particles_in_cells}; particles = shift_coords(cells,min,max,number); leaf_mpoles = form_mpoles(particles); leaf_locals = get_l(leaf_mpoles,number,min,max); grad_forces = {gradient_at_leaf(leaf_locals,particles): leaf_locals;particles}; dir_forces = ser_get_dir_forces(cells,number,min,max); total_forces = {add_vect(grad_forces,dir_forces) : grad_forces; dir_forces}; in zip(flatten(particles_in_cells),flatten(total_forces)) $