\documentstyle[psfig,slide]{article}
\setlength{\textheight}{9.5in}
%\setlength{\columnsep}{1.8pc}
\setlength{\textwidth}{6.5in}
\setlength{\footheight}{0.0in}
\setlength{\topmargin}{-.5in}
\setlength{\headheight}{0.0in}
\setlength{\headsep}{0.0in}
\setlength{\oddsidemargin}{0in}
\setlength{\parindent}{1pc}

%\nologo
\pagestyle{headings}
\font\bigex=cmex10 scaled \magstep 5
\input epsf

\renewcommand{\footstring}{\sc Dinda, 1994}

\begin{document}

\slidehead{}

\center{\large Traffic Characteristics of Parallel Programs}

\center{\em P. A. Dinda \hspace{.5in} B. M. Garcia \hspace{.5in} K. S. Leung}

\center{5/3/95}

\newpage

\slidehead{Overview}
\begin{itemize}
\item Motivation
\item Sample programs
\item Methodology
\item Results 
   \begin{itemize}
   \item SOR and 2DFFT kernels (Dinda)
   \item AIRSHED model (Leung)
   \item DSD Utility (Garcia)
   \end{itemize}
\item Discussion
\end{itemize}

\newpage


\slidehead{Motivation}
\begin{itemize}
\item Fast workstations, high speed networks, QOS

\centerline{
\begin{tabular}{|l|l|l|l|}
\hline
Network & Raw Link BW & topology\\
\hline
Ethernet & 1.25 MB/s & Bus\\
FDDI     & 12.5 MB/s & Bus or switch\\
IBM SP-2 & 45 MB/s & Switch\\
Cray T3D & 70 MB/s & 3D Torus\\
ATM (OC-12) & 70 MB/s & Switch\\
HIPPI & 100 MB/s & Switch \\
Intel Paragon & 200 MB/s & 2D Mesh\\
\hline
\end{tabular}
}

\item Communication standardization (PVM, MPI)
\item Languages (Fortran 90, HPF (DEC,PGI,etc))
\item Support utilities (Condor, Dsd, DQS, reliable-PVM, etc.)
\end{itemize}

\newpage

\slidehead{Sample programs}

\begin{itemize}
\item Kernels for SPMD communication patterns
   \begin{itemize}
      \item Chosen to represent some common patterns
      \item Written in Fx, an HPF variant
      \item Use PVM, a presentation layer, for communication
   \end{itemize}

\vspace{.3in}
\centerline{\psfig{figure=commpatterns.eps,width=3in}}

\centerline{\small
\begin{tabular}{|l|l|l|}
\hline
Pattern & Kernel & Description\\
\hline
Neighbor &  SOR & 2D Successive overrelaxation\\
All-to-all & 2DFFT & 2D Data parallel FFT\\
Partition & T2DFFT & 2D Task parallel FFT\\
Broadcast & SEQ & Sequential I/O\\
Tree & HIST & 2D Image histogram\\
\hline
\end{tabular}
}

\item AIRSHED air quality modeling application
  \begin{itemize}
     \item Written in Fx and run using PVM
     \item Program is a {\em model} of actual application
  \end{itemize}
\item DSD distributed fault diagnosis utility
  \begin{itemize}
     \item Representative of support utilities for parallel programs
  \end{itemize}
\end{itemize}

\newpage

\slidehead{Methodology}
\begin{itemize}
\item Environment
  \begin{itemize}
      \item Nine DEC 3K/400 Alpha workstations
      \item Multi-segment bridged Ethernet LAN
  \end{itemize}
\item{Compilation and communication methods}
  \begin{itemize}
      \item Kernels and AIRSHED: Fx + DEC F77 + PVM (TCP)
      \item DSD: g++ + UDP
  \end{itemize}
\item{Measurement}
  \begin{itemize}
      \item Dedicated trace gathering machine
      \item ``promiscuous mode'' + TCPDUMP
      \item Count whole packets, not just data portions
  \end{itemize}
\item View ``traffic of a connection'' as all traffic between
a source/destination pair on behalf of the parallel program
  \begin{itemize}
      \item $P$ processors $\rightarrow$ $P(P-1) connections$
      \item Connection includes ACKS for symmetric connection
  \end{itemize}
\item Full results include packet size statistics, interarrival
time statistics, instantaneous average bandwidth, bandwidth power
spectra, and DBIND characterizations
\end{itemize}


\newpage

\slidehead{Packet size statistics}

\centerline{
\begin{tabular}{|l|c|c|c|c|}
\hline
 & \multicolumn{4}{c|}{\bf Packet Size (Bytes)} \\
\cline{2-5}
{\bf Program} & Min & Max & Avg & SD \\
\hline
SOR    & 58 & 1518 & 473 & 568 \\
2DFFT  & 58 & 1518 & 969 & 678 \\
T2DFFT & 58 & 1518 & 912 & 663 \\
SEQ    & 58 & 90   & 75  & 14  \\
HIST   & 58 & 1518 & 499 & 575 \\
\hline
\end{tabular}
}
\vspace{.1in}
\centerline{Aggregate}
\vspace{.1in}
\centerline{
\begin{tabular}{|l|c|c|c|c|}
\hline
 & \multicolumn{4}{c|}{\bf Packet Size (Bytes)} \\
\cline{2-5}
{\bf Program} & Min & Max & Avg & SD \\
\hline
SOR    & 58 & 1518 & 577 & 591 \\
2DFFT  & 58 & 1518 & 977 & 667 \\
T2DFFT &134 & 1518 & 1442 & 158 \\
SEQ    & - & -   & -  & -  \\
HIST   & - & - & - & - \\
\hline
\end{tabular}
}
\vspace{.1in}
\centerline{connection}

\newpage

\slidehead{Interarrival time statistics}

\begin{center}
\begin{tabular}{|l|c|c|c|c|}
\hline
 & \multicolumn{4}{c|}{\bf Interarrival Time (ms)} \\
\cline{2-5}
{\bf Program} & Min & Max & Avg & SD \\
\hline
2DFFT  & 0.0 & 1395.8 & 1.3  & 10.8    \\
HIST   & 0.0 & 449.9  & 16.5 & 45.5 \\
SEQ    & 0.0 & 218.6  & 1.3  & 8.6  \\
SOR    & 0.0 & 1728.7 & 82.1 & 234.9 \\
T2DFFT & 0.0 & 1301.6 & 1.5  & 14.3   \\
\hline
\end{tabular} 
\vspace{.1in}

(aggregate)
\vspace{.1in}

\begin{tabular}{|l|c|c|c|c|}
\hline
 & \multicolumn{4}{c|}{\bf Interarrival Time (ms)} \\
\cline{2-5}
{\bf Program} & Min & Max & Avg & SD \\
\hline
2DFFT  & 0.0 & 2732.6 & 15.1 & 120.5    \\
HIST   & -   & -      & -    & - \\
SEQ    & -   & -      & -    & -  \\
SOR    & 0.0 & 1797.0 & 614.2& 590.8 \\
T2DFFT & 0.0 & 4216.7 & 9.5  & 127.3 \\
\hline
\end{tabular}
\vspace{.1in}

(connection)
\end{center}

\newpage


\slidehead{SOR instantaneous bandwidth}

\centerline{\psfig{figure=SOR.all.patch.time.winbw.chop.10.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=SOR.ba.patch.time.winbw.chop.10.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}

\newpage

\slidehead{SOR bandwidth power spectrum}

\centerline{\psfig{figure=SOR.all.patch.time.swb.chop.psd.all.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=SOR.ba.patch.time.swb.chop.psd.all.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}


\newpage

\slidehead{SOR DBIND characterization}

\centerline{\psfig{figure=SOR.all.patch.time.bwp.chop.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=SOR.ba.patch.time.bwp.chop.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}


\newpage

\slidehead{2DFFT instantaneous bandwidth}

\centerline{\psfig{figure=FFT.all.patch.time.winbw.chop.10.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=FFT.ba.patch.time.winbw.chop.10.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}

\newpage

\slidehead{2DFFT bandwidth power spectrum}

\centerline{\psfig{figure=FFT.all.patch.time.swb.chop.psd.5.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=FFT.ba.patch.time.swb.chop.psd.10.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}


\newpage

\slidehead{2DFFT DBIND characterization}

\centerline{\psfig{figure=FFT.all.patch.time.bwp.chop.ps,height=3.5in}}

\vspace{.1in}
\centerline{(aggregate)}

\vspace{.1in}
\centerline{\psfig{figure=FFT.ba.patch.time.bwp.chop.ps,height=3.5in}}

\vspace{.1in}
\centerline{(connection)}


\newpage

\slidehead{Discussion}

\begin{itemize}
\item Parallel programs are fundamentally different traffic sources
\item Bursty
\item {\em Global, collective} communication patterns
\item Traffic along different connections is {\em correlated}! 
  \begin{itemize}
     \item Can even be {\em in phase} for synchronizing patterns or
           external synchronization
  \end{itemize}
\item Compared to a video traffic along a connection, know burst
      {\em size}, but burst {\em interval} depends on connection bandwidth
\centerline{$t_{bi} = t_{local} + N/B$}
\item Three parameter source model suggestion: $[l(),b(),c]$ where
  \begin{itemize}
      \item $l: P \rightarrow t_{local}$
      \item $b: P \rightarrow N $
      \item $c$ is the communication pattern
  \end{itemize}
\item Application wants to minimize burst interval given the current
      network conditions, is willing to change the number of processors $P$
\end{itemize}

\newpage

\end{document}






