| |
In ICCD '00
Srihari Cadambi and Seth Copen Goldstein
Austin, TX
September, 2000
download pdf
@inproceedings{cadambi-iccd00,
title = {Efficient Place and Route for Pipeline Reconfigurable
Architectures},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-iccd00.pdf},
booktitle = {ICCD '00},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
address = {Austin, TX},
year = {2000},
month = {September},
keywords = {CAD,Place and Route}
}
Related Papers
| CAD |
|
Slack Analysis in the System Design Loop | bib | |
Girish Venkataramani and Seth Copen Goldstein.
In IEEE/ACM/IFIP International Conference on Hardware/Software Codesign and System Synthesis (CODES-ISSS),
pages 231–236, October, 2008.
|
| @inproceedings{venkataramani-codes08,
author = {Venkataramani, Girish and Goldstein, Seth Copen},
booktitle = {IEEE/ACM/IFIP International Conference on
Hardware/Software Codesign and System Synthesis {(CODES-ISSS)}},
year = {2008},
address = {Atlanta, GE},
month = {October},
keywords = {Asychronous Circuits, CAD, Global Critical Path},
title = {Slack Analysis in the System Design Loop},
pages = {231--236}
}
|
|
Area Optimizations for Dual-Rail Circuits Using Relative-Timing Analysis | pdf bib | |
Tiberiu Chelcea, Girish Venkataramani, and Seth Copen Goldstein.
In Proceedings of the 13th IEEE International Symposium on Asynchronous Circuits and Systems,
pages 117–128, March, 2007.
|
| @inproceedings{chelcea-async07,
author = {Chelcea, Tiberiu and Venkataramani, Girish and Goldstein,
Seth Copen},
title = {Area Optimizations for Dual-Rail Circuits Using
Relative-Timing Analysis},
booktitle = {Proceedings of the 13th IEEE International Symposium on
Asynchronous Circuits and Systems},
year = {2007},
address = {Berkeley, CA},
month = {March},
pages = {117--128},
abstract = {Future deep sub-micron technologies will be
characterized by large parametric variations, which could make
asynchronous design an attractive solution for use on large
scale. However, the investment in asynchronous CAD tools does not
approach that in synchronous ones. Even when asynchronous tools
leverage existing synchronous toolflows, they introduce large
area and speed overheads. This paper proposes several heuristic
and optimal algorithms, based on timing interval analysis, for
improving existing asynchronous CAD solutions by optimizing area.
The optimized circuits are 2.4 times smaller for an optimal
algorithm and 1.8 times smaller for a heuristic one than the
existing solutions. The optimized circuits are also shown to be
resilient to large parametric variations, yielding better
average-case latencies than their synchronous counterparts.},
url = {http://www.cs.cmu.edu/~seth/papers/chelcea-async07.pdf},
keywords = {Asychronous Circuits, CAD}
}
|
|
Global Critical Path: A Tool for System-Level Timing Analysis | pdf bib | |
Girish Venkataramani, Mihai Budiu, Tiberiu Chelcea, and Seth Copen Goldstein.
In Proceedings of the 44th ACM/IEEE Design Automation Conference,
pages 783–786, June, 2007.
|
| @inproceedings{dac07-gcp,
author = {Venkataramani, Girish and Budiu, Mihai and Chelcea,
Tiberiu and Goldstein, Seth Copen},
title = {Global Critical Path: A Tool for System-Level Timing
Analysis},
booktitle = {Proceedings of the 44th ACM/IEEE Design Automation
Conference},
year = {2007},
month = {June},
address = {San Diego, CA},
pages = {783--786},
abstract = {An effective method for focusing optimization effort on
the most important parts of a design is to examine those elements
on the critical path. Traditionally, the critical path is defined
at the RTL level, as the longest path in the combinational logic
between clocked reisters. In this paper, we present a
system-level timing analysis technique to define the concept of a
Global Critical Path (GCP), for predicting system-level
performance. We show how the GCP can be used as a theoretical and
practical tool for understanding, summarizing and optimizing the
behavior of highly concurrent self-timed circuits. We formally
define the GCP and show how it can be constructed using a
discrete event model and hardware profiling techniques. The GCP
provides valuable insight into the control-path behavior of
circuits and in finding system-level bottlenecks. We have
incorporated the GCP construction and analysis framework into a
high-level synthesis and simulation toolchain, thus enabling
complete automation in modeling, analysis and optimization.},
url = {http://www.cs.cmu.edu/~seth/papers/dac07-gcp.pdf},
keywords = {Asychronous Circuits, CAD, Global Critical Path, System
modeling, Hardware profiling}
}
|
|
Operation Chaining Asynchronous Pipelined Circuits | pdf bib | |
Girish Venkataramani and Seth Copen Goldstein.
In ICCAD,
November, 2007.
|
| @inproceedings{venkataramani-iccad07,
author = {Venkataramani, Girish and Goldstein, Seth Copen},
title = {Operation Chaining Asynchronous Pipelined Circuits},
booktitle = {ICCAD},
abstract = {We define operation chaining (op-chaining) as an
optimization problem to determine the optimal pipeline depth for
balancing performance against energy demands in pipelined
asynchronous designs. Since there are no clock period
requirements, asynchronous pipeline stages can have non-uniform
latencies. We exploit this fact to coalesce several stages
together thereby saving power and area due to the elimination of
control-path resources from the pipeline. The trade-off is
potentially reduced pipeline parallelism. In this paper, we
formally define this optimization as a graph covering problem,
which finds sub-graphs that will be synthesized as an opchained
pipeline stage. We then define the solution space for provably
correct solutions and present an algorithm to efficiently search
this space. The search technique partitions the graph based on
post-dominator relationships to find sub-graphs that are
potential op-chain candidates. We use knowledge of the Global
Critical Path (GCP) [13] to evaluate the performance impact of
accepting a candidate sub-graph and formulate a heuristic cost
function to model this trade-off. The algorithm has a
quadratic-time complexity in the size of the dataflow graph. We
have implemented this algorithm within an automated asynchronous
synthesis toolchain [12]. Experimental evidence from applying the
algorithm on several media processing kernels reveals that the
average energy-delay and energy-delay-area products improve by
about 1.4x and 1.8x respectively, with a maximum improvement of
5x and 18x.},
month = {November},
year = {2007},
url = {http://www.cs.cmu.edu/~seth/papers/venkataramani-iccad07.pdf},
keywords = {Asychronous Circuits, CAD, Global Critical Path}
}
|
|
Leveraging Protocol Knowledge in Slack Matching | pdf bib | |
Girish Venkataramani and Seth Copen Goldstein.
In IEEE/ACM International Conference on Computer-Aided Design (ICCAD),
November, 2006.
|
| @inproceedings{venkataramani-iccad06,
title = {Leveraging Protocol Knowledge in Slack Matching},
author = {Venkataramani, Girish and Goldstein, Seth Copen},
booktitle = {IEEE/ACM International Conference on Computer-Aided
Design (ICCAD)},
year = {2006},
address = {San Jose, CA},
month = {November},
abstract = {{Stalls, due to mis-matches in communication rates, are
a major performance obstacle in pipelined circuits. If the rate
of data production is faster than the rate of consumption, the
resulting design performs slower than when the communication rate
is matched. This can be remedied by inserting pipeline buffers
(to temporarily hold data), allowing the producer to proceed if
the consumer is not ready to accept data. The problem of deciding
which channels need these buffers (and how many) for an arbitrary
communication profile is called the slack matching problem; the
optimal solution to this problem has been shown to be
NP-complete. \par In this paper, we present a heuristic that uses
knowledge of the communication protocol to explicitly model these
bottlenecks, and an iterative algorithm to progressively remove
these bottlenecks by inserting buffers. We apply this algorithm
to asynchronous circuits, and show that it naturally handles
large designs with arbitrarily cyclic and acyclic topologies,
which exhibit various types of control choice. The heuristic is
efficient, achieving linear time complexity in practice, and
produces solutions that (a) achieve up to 60\% performance
speedup on large media processing kernels, and (b) can either be
verified to be optimal, or the approximation margin can be
bounded. }},
keywords = {Asychronous Circuits, Spatial Computing, CAD, Global
Critical Path},
url = {http://www.cs.cmu.edu/~seth/papers/venkataramani-iccad06.pdf}
}
|
|
Modeling the Global Critical Path in Concurrent Systems | pdf bib | |
Girish Venkataramani, Tiberiu Chelcea, Mihai Budiu, and Seth Copen Goldstein.
Carnegie Mellon University Technical Report No. CMU-CS-06-144,
August, 2006.
|
| @techreport{venkataramani-tr06,
author = {Venkataramani, Girish and Chelcea, Tiberiu and Budiu,
Mihai and Goldstein, Seth Copen},
title = {Modeling the Global Critical Path in Concurrent Systems},
institution = {Carnegie Mellon University},
year = {2006},
number = {CMU-CS-06-144},
month = {August},
abstract = {We show how the global critical path can be used as a
practical tool for understanding, optimizing and summarizing the
behavior of highly concurrent self-timed circuits. Traditionally,
critical path analysis has been applied to DAGs, and thus was
constrained to combinatorial sub-circuits. We formally define the
global critical path (GCP) and show how it can be constructed
using only local information that is automatically derived
directly from the circuit. We introduce a form of Production
Rules, which can accurately determine the GCP for a given input
vector, even for modules which exhibit choice and early
termination. \par The GCP provides valuable insight into the
control behavior of the application, which help in formulating
new optimizations and re-formulating existing ones to use the GCP
knowledge. We have constructed a fully automated framework for
GCP detection and analysis, and have incorporated this framework
into a high-level synthesis tool-chain. We demonstrate the
effectiveness of the GCP framework by re-formulating two
traditional CAD optimizations to use the GCP, yielding efficient
algorithms which improve circuit power (by up to 9\%) and
performance (by up to 60\%) in our experiments.},
keywords = {Asychronous Circuits, Spatial Computing,CAD, Global
Critical Path},
url = {http://www.cs.cmu.edu/~seth/papers/venkataramani-tr06.pdf}
}
|
|
SOMA: A Tool for Synthesizing and Optimizing Memory Accesses in ASICs | pdf bib | |
Girish Venkataramani, Tobias Bjerregaard, Tiberiu Chelcea, and Seth Copen Goldstein.
In IEEE/ACM/IFIP International Conference on Hardware/Software Codesign and System Synthesis (CODES-ISSS),
pages 231–236, September, 2005.
|
| @inproceedings{venkataramani-isss05,
title = {SOMA: A Tool for Synthesizing and Optimizing Memory
Accesses in ASICs},
author = {Venkataramani, Girish and Bjerregaard, Tobias and Chelcea,
Tiberiu and Goldstein, Seth Copen},
booktitle = {IEEE/ACM/IFIP International Conference on
Hardware/Software Codesign and System Synthesis (CODES-ISSS)},
year = {2005},
isbn = {1-59593-161-9},
pages = {231-236},
address = {Jersey City, NJ, USA},
month = {September},
abstract = {Arbitrary memory dependencies and variable latency
memory systems are major obstacles to the synthesis of
large-scale ASIC systems in high-level synthesis. This paper
presents SOMA, a synthesis framework for constructing Memory
Access Network (MAN) architectures that inherently enforce memory
consistency in the presence of dynamic memory access
dependencies. A fundamental bottleneck in any such network is
arbitrating between concurrent accesses to a shared memory
resource. To alleviate this bottleneck, SOMA uses an
application-specific concurrency analysis technique to predict
the dynamic memory parallelism profile of the application. This
is then used to customize the MAN architecture. Depending on the
parallelism profile, the MAN may be optimized for latency,
throughput or both. The optimized MAN is automatically
synthesized into gate-level structural Verilog using a flexible
library of network building blocks. SOMA has been successfully
integrated into an automated C-to-hardware synthesis flow, which
generates standard cell circuits from unrestricted ANSI-C
programs. Post-layout experiments demonstrate that application
specific MAN construction significantly improves power and
performance.},
keywords = {Asychronous Circuits, Spatial Computing,Phoenix,
CAD,Compilers:Memory Optimizations},
url = {http://www.cs.cmu.edu/~seth/papers/venkataramani-isss05.pdf}
}
|
|
Translating ANSI C to Asynchronous Circuits | pdf bib | |
Mihai Budiu, Girish Venkataramani, Tiberiu Chelcea, and Seth Copen Goldstein.
In 10th IEEE International Symposium on Asynchronous Circuits and Systems (ASYNC '04),
April, 2004.
|
| @inproceedings{budiu-async04,
title = {Translating ANSI C to Asynchronous Circuits},
url = {http://www.cs.cmu.edu/~seth/papers/budiu-async04.pdf},
booktitle = {10th IEEE International Symposium on Asynchronous
Circuits and Systems (ASYNC '04)},
author = {Budiu, Mihai and Venkataramani, Girish and Chelcea,
Tiberiu and Goldstein, Seth Copen},
address = {Crete, Greece},
year = {2004},
month = {April},
keywords = {Asychronous Circuits,CAD,Electronic Nanotechnology,Fault
and Defect Tolerance,Phoenix,Reconfigurable Computing,Spatial
Computing}
}
|
|
C to Asynchronous Dataflow Circuits: An End-to-End Toolflow | pdf bib | |
Girish Venkataramani, Mihai Budiu, Tiberiu Chelcea, and Seth Copen Goldstein.
In IEEE 13th International Workshop on Logic Synthesis (IWLS),
June, 2004.
|
| @inproceedings{venkataramani-iwls04,
title = {{C} to Asynchronous Dataflow Circuits: An End-to-End
Toolflow},
author = {Venkataramani, Girish and Budiu, Mihai and Chelcea,
Tiberiu and Goldstein, Seth Copen},
booktitle = {IEEE 13th International Workshop on Logic Synthesis
(IWLS)},
address = {Temecula, CA},
month = {June},
year = {2004},
url = {http://www.cs.cmu.edu/~seth/papers/venkataramani-iwls04.pdf},
abstract = {We present a complete toolflow that translates ANSI-C
programs into asynchronous circuits. The toolflow is built around
a compiler that converts C into a functional dataflow
intermediate representation, exposing instruction-level, pipeline
and memory parallelism. The compiler performs optimizations and
converts the intermediate representation into pipelined
asynchronous circuits, with no centralized controllers. In the
resulting circuits, control is distributed, communication is
achieved through local wires, and arbitration for datapath
resources is unnecessary. Circuits automatically synthesized from
Mediabench kernels exhibit substantially better energy-delay than
either single-issue processors or aggressive superscalar cores.},
keywords = {Asychronous Circuits,Spatial Computing,Phoenix,CAD}
}
|
|
Molecules, Gates, Circuits, Computer | pdf bib | |
Seth Copen Goldstein and Mihai Budiu.
In Molecular Nanoelectronics,,
January, 2003.
|
| @incollection{goldstein-mn03,
title = {Molecules, Gates, Circuits, Computer},
url = {http://www.cs.cmu.edu/~seth/papers/goldstein-mn03.pdf},
booktitle = {Molecular Nanoelectronics,},
author = {Goldstein, Seth Copen and Budiu, Mihai},
year = {2003},
editor = {Mark A. Reed and Takhee Lee},
publisher = {American Scientific Publishers},
address = {Stevenson Ranch, CA},
month = {January},
isbn = {1-588883-006-3},
keywords = {Asychronous Circuits,CAD,Electronic Nanotechnology,Fault
and Defect Tolerance,Reconfigurable Computing,Spatial
Computing,electronic nanotechnology,molecular electronics}
}
|
|
MolSpice: Designing Molecular Logic Circuits | pdf bib | |
Seth Copen Goldstein, James Ellenbogen, David Almassiam, Matt Brown, Mark Cannarsa, Jesse Klein, Schuyler Schell, Geoff Washburn, and Matthew M Ziegler.
In Ninth Foresight Conference on Molecular Nanotechnology,
November, 2001.
|
| @inproceedings{goldstein-foresight01,
author = {Goldstein, Seth Copen and Ellenbogen, James and Almassiam,
David and Brown, Matt and Cannarsa, Mark and Klein, Jesse and
Schell, Schuyler and Washburn, Geoff and Ziegler, Matthew M},
title = {MolSpice: Designing Molecular Logic Circuits},
booktitle = {Ninth Foresight Conference on Molecular
Nanotechnology},
url = {http://www.cs.cmu.edu/~seth/papers/goldstein-foresight01.pdf},
year = {2001},
month = {November},
address = {Santa Clara, CA},
keywords = {Electronic Nanotechnology, Molecular Electronics, CAD}
}
|
|
Static Profile-driven Compilation for FPGAs | pdf bib | |
Srihari Cadambi and Seth Copen Goldstein.
In Proceedings of the 11th International Conference on Field-Programmable Logic and Applications,
August, 2001.
|
| @inproceedings{cadambi-fpl01,
title = {Static Profile-driven Compilation for FPGAs},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-fpl01.pdf},
booktitle = {Proceedings of the 11th International Conference on
Field-Programmable Logic and Applications},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
address = {Belfast, Northern Ireland},
year = {2001},
month = {August},
keywords = {CAD,Reconfigurable Computing}
}
|
|
BitValue Inference: Detecting and Exploiting Narrow Bitwidth Computations | pdf bib | |
Mihai Budiu and Seth Copen Goldstein.
Carnegie Mellon University Technical Report,
June, 2000.
See budiu-europar00.
|
| @techreport{budiu-tr00,
title = {BitValue Inference: Detecting and Exploiting Narrow
Bitwidth Computations},
url = {http://www.cs.cmu.edu/~seth/papers/budiu-tr00.pdf},
booktitle = {CMU CS Technical Report, CMU-CS-00-141},
author = {Budiu, Mihai and Goldstein, Seth Copen},
institution = {Carnegie Mellon University},
year = {2000},
month = {June},
see = {budiu-europar00},
keywords = {CAD,Compilers:CASH,Reconfigurable Computing}
}
|
|
Efficient Place and Route for Pipeline Reconfigurable Architectures | pdf bib | |
Srihari Cadambi and Seth Copen Goldstein.
In ICCD '00,
September, 2000.
|
| @inproceedings{cadambi-iccd00,
title = {Efficient Place and Route for Pipeline Reconfigurable
Architectures},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-iccd00.pdf},
booktitle = {ICCD '00},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
address = {Austin, TX},
year = {2000},
month = {September},
keywords = {CAD,Place and Route}
}
|
|
BitValue Inference: Detecting and Exploiting Narrow Bitwidth Computations | pdf bib | |
Mihai Budiu, Majd Sakr, Kevin Walker, and Seth Copen Goldstein.
In Proceedings of the 2000 Europar Conference,
volume 1900, pages 969–979,August, 2000.
Also appeared as CMU CS Technical Report, CMU-CS-00-141, October 2000..
|
| @inproceedings{budiu-europar00,
title = {{BitValue} Inference: Detecting and Exploiting Narrow
Bitwidth Computations},
author = {Budiu, Mihai and Sakr, Majd and Walker, Kevin and
Goldstein, Seth Copen},
booktitle = {Proceedings of the 2000 Europar Conference},
year = {2000},
volume = {1900},
pages = {969--979},
month = {August},
issn = {0302-9743},
series = {Lecture Notes in Computer Science},
publisher = {Springer Verlag},
address = {Munich, Germany},
url = {http://www.cs.cmu.edu/~seth/papers/budiu-europar00.pdf},
also = {CMU CS Technical Report, CMU-CS-00-141, October 2000.},
abstract = {We present a compiler algorithm called BitValue, which
can discover both unused and constant bits in dusty-deck C
programs. BitValue uses forward and backward dataflow analyses,
generalizing constant-folding and dead-code detection at the
bit-level. This algorithm enables compiler optimizations which
target special processor architectures for computing on
non-standard bitwidths. Using this algorithm we show that up to
31\% of the computed bytes are thrown away (for programs from
SpecINT95 and Mediabench). A compiler for reconfigurable hardware
uses this algorithm to achieve substantial reductions (up to
20-fold) in the size of the synthesized circuits.},
keywords = {Spatial Computing,Reconfigurable
Computing,Phoenix,PipeRench,CAD}
}
|
|
CPR: A Configuration Profiling Tool | pdf bib | |
Srihari Cadambi and Seth Copen Goldstein.
In 7th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM '99),
pages 104, April, 1999.
|
| @inproceedings{cadambi-fccm99,
title = {CPR: A Configuration Profiling Tool},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-fccm99.pdf},
booktitle = {7th Annual IEEE Symposium on Field-Programmable Custom
Computing Machines (FCCM '99)},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
year = {1999},
pages = {104},
address = {Napa Valley, CA},
month = {April},
keywords = {CAD,Reconfigurable Computing,Place And Route}
}
|
| Place And Route |
|
Efficient Place and Route for Pipeline Reconfigurable Architectures | pdf bib | |
Srihari Cadambi and Seth Copen Goldstein.
In ICCD '00,
September, 2000.
|
| @inproceedings{cadambi-iccd00,
title = {Efficient Place and Route for Pipeline Reconfigurable
Architectures},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-iccd00.pdf},
booktitle = {ICCD '00},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
address = {Austin, TX},
year = {2000},
month = {September},
keywords = {CAD,Place and Route}
}
|
|
CPR: A Configuration Profiling Tool | pdf bib | |
Srihari Cadambi and Seth Copen Goldstein.
In 7th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM '99),
pages 104, April, 1999.
|
| @inproceedings{cadambi-fccm99,
title = {CPR: A Configuration Profiling Tool},
url = {http://www.cs.cmu.edu/~seth/papers/cadambi-fccm99.pdf},
booktitle = {7th Annual IEEE Symposium on Field-Programmable Custom
Computing Machines (FCCM '99)},
author = {Cadambi, Srihari and Goldstein, Seth Copen},
year = {1999},
pages = {104},
address = {Napa Valley, CA},
month = {April},
keywords = {CAD,Reconfigurable Computing,Place And Route}
}
|
|
Fast Compilation for Pipelined Reconfigurable Fabrics | pdf bib | |
Mihai Budiu and Seth Copen Goldstein.
In Proceedings of the 1999 ACM/SIGDA Seventh International Symposium on Field Programmable Gate Arrays (FPGA '99),
pages 195–205, February, 1999.
|
| @inproceedings{budiu-fpga99,
author = {Budiu, Mihai and Goldstein, Seth Copen},
title = {Fast Compilation for Pipelined Reconfigurable Fabrics},
booktitle = {Proceedings of the 1999 ACM/SIGDA Seventh International
Symposium on Field Programmable Gate Arrays (FPGA '99)},
month = {February},
year = {1999},
pages = {195-205},
url = {http://www.cs.cmu.edu/~seth/papers/budiu-fpga99.pdf},
abstract = {In this paper we describe a compiler which quickly
synthesizes high quality pipelined datapaths for pipelined
reconfigurable devices. The compiler uses the same internal
representation to perform synthesis, module generation,
optimization, and place and route. The core of the compiler is a
linear time place and route algorithm more than two orders of
magnitude faster than traditional CAD tools. The key behind our
approach is that we never backtrack, rip-up, or re-route.
Instead, the graph representing the computation is preprocessed
to guarantee routability by inserting lazy noops. The
preprocessing steps provides enough information to make a greedy
strategy feasible. The compilation speed is approximately 3000
bit-operations/second (on a PII/400Mhz) for a wide range of
applications. The hardware utilization averages 60\% on the
target device, PipeRench.},
keywords = {Reconfigurable Computing,PipeRench,Place and Route}
}
|
Back to publications list
|