From 424e3e31106f04b05d6636e08eba26fdd70bc7b7 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Wed, 8 Jan 2025 21:26:29 +0000 Subject: [PATCH] deploy: 56b7026183af4d39f4fd6025ca236a7a900640c6 --- 404.html | 2 +- categories/analyzing/index.html | 6 +++--- categories/conditioning/index.html | 6 +++--- categories/design/index.html | 6 +++--- categories/domain-flow/index.html | 6 +++--- categories/dsp/index.html | 6 +++--- categories/filtering/index.html | 6 +++--- categories/identification/index.html | 6 +++--- categories/index.html | 6 +++--- categories/introduction/index.html | 6 +++--- categories/matrix-math/index.html | 6 +++--- categories/schedule/index.html | 6 +++--- categories/spacetime/index.html | 6 +++--- categories/transforming/index.html | 6 +++--- ch1/computational-spacetime/index.html | 8 ++++---- ch1/derivation/index.html | 8 ++++---- ch1/domain-flow/index.html | 8 ++++---- ch1/example/index.html | 8 ++++---- ch1/freeschedule/index.html | 8 ++++---- ch1/index.html | 8 ++++---- ch1/linearschedule/index.html | 8 ++++---- ch1/nextsteps/index.html | 8 ++++---- ch1/parallel-programming/index.html | 8 ++++---- ch1/spacetime/index.html | 8 ++++---- ch1/wavefront/index.html | 8 ++++---- ch2-moc/dfa/index.html | 22 +++++++++++++--------- ch2-moc/dfm/index.html | 10 +++++----- ch2-moc/index.html | 8 ++++---- ch2-moc/index.xml | 4 ++-- ch2-moc/nextsteps/index.html | 13 +++++++------ ch2-moc/spm/index.html | 8 ++++---- ch2-moc/taxonomy/index.html | 8 ++++---- ch3-design/control/index.html | 8 ++++---- ch3-design/currentstate/index.html | 8 ++++---- ch3-design/elements/index.html | 8 ++++---- ch3-design/energy/index.html | 8 ++++---- ch3-design/index.html | 8 ++++---- ch3-design/nextsteps/index.html | 8 ++++---- ch3-design/space/index.html | 8 ++++---- ch3-design/switching-energy/index.html | 8 ++++---- ch3-design/time/index.html | 8 ++++---- ch4/index.html | 8 ++++---- ch4/level1/index.html | 8 ++++---- ch4/level2/index.html | 8 ++++---- ch4/level3/index.html | 8 ++++---- ch5/factorization/index.html | 6 +++--- ch5/index.html | 8 ++++---- ch6/index.html | 8 ++++---- ch6/matrixkernels/index.html | 8 ++++---- ch7/index.html | 6 +++--- ch7/lu/index.html | 8 ++++---- ch7/solvers/index.html | 6 +++--- ch8/conditioning/index.html | 6 +++--- ch8/filters/index.html | 6 +++--- ch8/identification/index.html | 8 ++++---- ch8/index.html | 8 ++++---- ch8/spectral/index.html | 6 +++--- ch8/transforms/index.html | 6 +++--- contentdev/index.html | 8 ++++---- contentdev/prototype/index.html | 8 ++++---- search/index.html | 8 ++++---- sitemap.xml | 2 +- tags/algorithm/index.html | 6 +++--- tags/computational-spacetime/index.html | 6 +++--- tags/conditioning/index.html | 6 +++--- tags/derivation/index.html | 6 +++--- tags/domain-flow/index.html | 6 +++--- tags/dsp/index.html | 6 +++--- tags/filtering/index.html | 6 +++--- tags/free-schedule/index.html | 6 +++--- tags/identification/index.html | 6 +++--- tags/index-space/index.html | 6 +++--- tags/index.html | 6 +++--- tags/lattice/index.html | 6 +++--- tags/linear-schedule/index.html | 6 +++--- tags/matrix-multiply/index.html | 6 +++--- tags/spectral-analysis/index.html | 6 +++--- tags/transform/index.html | 6 +++--- 78 files changed, 281 insertions(+), 276 deletions(-) diff --git a/404.html b/404.html index 0358cf9..523f686 100644 --- a/404.html +++ b/404.html @@ -1,2 +1,2 @@ 404 Page not found - Domain Flow Architecture -

44

Not found

Whoops. Looks like this page doesn't exist ¯\_(ツ)_/¯.

Go to homepage

\ No newline at end of file +

44

Not found

Whoops. Looks like this page doesn't exist ¯\_(ツ)_/¯.

Go to homepage

\ No newline at end of file diff --git a/categories/analyzing/index.html b/categories/analyzing/index.html index 79aa267..2c4d5f4 100644 --- a/categories/analyzing/index.html +++ b/categories/analyzing/index.html @@ -1,10 +1,10 @@ Analyzing - Category - Domain Flow Architecture -

Category - Analyzing

S

\ No newline at end of file diff --git a/categories/conditioning/index.html b/categories/conditioning/index.html index 9ab2eca..c8b25c1 100644 --- a/categories/conditioning/index.html +++ b/categories/conditioning/index.html @@ -1,10 +1,10 @@ Conditioning - Category - Domain Flow Architecture -

Category - Conditioning

S

\ No newline at end of file diff --git a/categories/design/index.html b/categories/design/index.html index 43ec71d..8468dfe 100644 --- a/categories/design/index.html +++ b/categories/design/index.html @@ -1,10 +1,10 @@ Design - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/domain-flow/index.html b/categories/domain-flow/index.html index 74876dd..56b26f2 100644 --- a/categories/domain-flow/index.html +++ b/categories/domain-flow/index.html @@ -1,10 +1,10 @@ Domain-Flow - Category - Domain Flow Architecture -

Category - Domain-Flow

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/categories/dsp/index.html b/categories/dsp/index.html index 3f0a99c..48c46c1 100644 --- a/categories/dsp/index.html +++ b/categories/dsp/index.html @@ -1,10 +1,10 @@ Dsp - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/filtering/index.html b/categories/filtering/index.html index 639472a..333a74d 100644 --- a/categories/filtering/index.html +++ b/categories/filtering/index.html @@ -1,10 +1,10 @@ Filtering - Category - Domain Flow Architecture -

Category - Filtering

D

\ No newline at end of file diff --git a/categories/identification/index.html b/categories/identification/index.html index 014e99a..227363c 100644 --- a/categories/identification/index.html +++ b/categories/identification/index.html @@ -1,10 +1,10 @@ Identification - Category - Domain Flow Architecture -

Category - Identification

I

\ No newline at end of file diff --git a/categories/index.html b/categories/index.html index 5fb5489..42d1993 100644 --- a/categories/index.html +++ b/categories/index.html @@ -1,10 +1,10 @@ Categories - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/introduction/index.html b/categories/introduction/index.html index c6c4ab6..4645317 100644 --- a/categories/introduction/index.html +++ b/categories/introduction/index.html @@ -1,10 +1,10 @@ Introduction - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/matrix-math/index.html b/categories/matrix-math/index.html index 9626799..c053019 100644 --- a/categories/matrix-math/index.html +++ b/categories/matrix-math/index.html @@ -1,10 +1,10 @@ Matrix-Math - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/schedule/index.html b/categories/schedule/index.html index bfdcc8a..50e7895 100644 --- a/categories/schedule/index.html +++ b/categories/schedule/index.html @@ -1,10 +1,10 @@ Schedule - Category - Domain Flow Architecture -

Category - Schedule

F

L

\ No newline at end of file diff --git a/categories/spacetime/index.html b/categories/spacetime/index.html index 1bca897..47113d7 100644 --- a/categories/spacetime/index.html +++ b/categories/spacetime/index.html @@ -1,10 +1,10 @@ Spacetime - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/transforming/index.html b/categories/transforming/index.html index 474a284..b2d91a8 100644 --- a/categories/transforming/index.html +++ b/categories/transforming/index.html @@ -1,10 +1,10 @@ Transforming - Category - Domain Flow Architecture -

Category - Transforming

T

  • Transforms
\ No newline at end of file diff --git a/ch1/computational-spacetime/index.html b/ch1/computational-spacetime/index.html index 279548e..cff5631 100644 --- a/ch1/computational-spacetime/index.html +++ b/ch1/computational-spacetime/index.html @@ -1,5 +1,5 @@ Computational Spacetime - Domain Flow Architecture -

Computational Spacetime

Computational Spacetime

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/derivation/index.html b/ch1/derivation/index.html index 2155bb4..8855b13 100644 --- a/ch1/derivation/index.html +++ b/ch1/derivation/index.html @@ -3,7 +3,7 @@ The Linear Algebra universe is particularly rich in partial orders, something that has been exploited for centuries 1. Matrix Computations2 by Golub, and van Loan provide a comprehensive review. What follows may be a bit technical, but keep in mind the visualizations of the previous pages as you try to visualize what the math implies.">Derivation of the matrix multiply domain flow program - Domain Flow Architecture -

Derivation of the matrix multiply domain flow program

The concepts of partial and total orders are essential for finding optimal domain flow algorithms. +

Derivation of the matrix multiply domain flow program

The concepts of partial and total orders are essential for finding optimal domain flow algorithms. Partial orders, or Poset, are the source of high-performance, low-power execution patterns.

The Linear Algebra universe is particularly rich in partial orders, something that has been exploited for centuries 1. Matrix Computations2 by Golub, and van Loan provide @@ -83,12 +83,12 @@ b: b[i-1,j,k] c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } -

1: History of Matrices and Determinants

2: Matrix Computations, Gene Golub and Charles van Loan

\ No newline at end of file diff --git a/ch1/domain-flow/index.html b/ch1/domain-flow/index.html index bec0f7a..f71d122 100644 --- a/ch1/domain-flow/index.html +++ b/ch1/domain-flow/index.html @@ -7,7 +7,7 @@ Implementation technology will impact these phases differently, and we are seeking a programming model that is invariant to the difference. A thought experiment will shed light on the desired properties of such a model.">Domain Flow - Domain Flow Architecture -

Domain Flow

Domain Flow

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/example/index.html b/ch1/example/index.html index ac2c4b9..6e97d68 100644 --- a/ch1/example/index.html +++ b/ch1/example/index.html @@ -3,7 +3,7 @@ compute ( (i,j,k) | 1 <= i,j,k <= N ) { a: a[i,j-1,k] b: b[i-1,j,k] c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } The underlying algorithm requires a domain of computation governed by a set of constraints, and a set of computational dependencies that implicitly define a partial order across all the operations in the computation. The partial order is readily visible in the need to have computed the result for $c[i,j,k-1]$ before the computation of $c[i,j,k]$ can commence. In contrast, the $a$ and $b$ recurrences are independent of each other.">An Example - Domain Flow Architecture -

An Example

Let’s look at a simple, but frequently used operator in Deep Learning inference: +

An Example

Let’s look at a simple, but frequently used operator in Deep Learning inference: dense matrix multiplication. A Domain Flow program 1 for this operator is shown below:

compute ( (i,j,k) | 1 <= i,j,k <= N ) {
     a: a[i,j-1,k]
@@ -38,12 +38,12 @@
 where the variable $a$ is defined.

A thorough understanding of the partial and total orders inherent in the parallel computation is essential for finding optimal domain flow algorithms.

High-performance, low-power execution patterns frequently involve a partial order that enables timely reuse of computational results, or creates flexibility to organize just-in-time arrival -of input operands to avoid memory elements.

In the next segment, let’s explore these execution patterns.

1: Derivation of Domain Flow Matmul

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/freeschedule/index.html b/ch1/freeschedule/index.html index 4949e30..3266512 100644 --- a/ch1/freeschedule/index.html +++ b/ch1/freeschedule/index.html @@ -1,5 +1,5 @@ Free Schedule - Domain Flow Architecture -

Free Schedule

Free Schedule

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/index.html b/ch1/index.html index 33ba5d2..da520d3 100644 --- a/ch1/index.html +++ b/ch1/index.html @@ -3,16 +3,16 @@ High-performance, low-latency, energy-efficient computation is particularly important for the emerging application class of autonomous intelligent systems.">Domain Flow Algorithms - Domain Flow Architecture -

Domain Flow Algorithms

Domain Flow algorithms are parallel algorithms that incorporate the constraints of space and time. +

Domain Flow Algorithms

Domain Flow algorithms are parallel algorithms that incorporate the constraints of space and time. By honoring the delay that is inherent to exchanging information between two spatially separate computation or storage sites, domain flow algorithms can improve performance and energy efficiency compared to sequential programming models that depend on (globally addressable) random access memory.

High-performance, low-latency, energy-efficient computation is particularly important for the -emerging application class of autonomous intelligent systems.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/linearschedule/index.html b/ch1/linearschedule/index.html index 9dddc6f..140cb10 100644 --- a/ch1/linearschedule/index.html +++ b/ch1/linearschedule/index.html @@ -7,7 +7,7 @@ Let’s go through the thought experiment what the free schedule demands from a physical system. In the free schedule animation, the propagation recurrences distributing the $A$ and $B$ matrix elements throughout the 3D lattice run ‘ahead’ of the actual computational recurrence calculating the $C$ matrix elements.">Linear Schedules - Domain Flow Architecture -

Linear Schedules

Linear Schedules

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/nextsteps/index.html b/ch1/nextsteps/index.html index 6b74ed0..33a17ed 100644 --- a/ch1/nextsteps/index.html +++ b/ch1/nextsteps/index.html @@ -1,12 +1,12 @@ Next Steps - Domain Flow Architecture -

Next Steps

We have gone through a quick introduction to the basic concepts of parallel algorithm design. +

Next Steps

We have gone through a quick introduction to the basic concepts of parallel algorithm design. Before we dive into what makes good parallel algorithms, we first must take a quick detour -and discuss physical hardware organization of computing engines.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/parallel-programming/index.html b/ch1/parallel-programming/index.html index 5065a1e..6e82a8e 100644 --- a/ch1/parallel-programming/index.html +++ b/ch1/parallel-programming/index.html @@ -1,5 +1,5 @@ Parallel Programming - Domain Flow Architecture -

Parallel Programming

To appreciate the domain flow programming model and what it enables, you need to think about the physical +

Parallel Programming

To appreciate the domain flow programming model and what it enables, you need to think about the physical form a ‘program evaluator’ could take. In the days when a processor occupied the volume of a small room, any physical computational machine was limited to a single computational element. This implied that the execution of any algorithm had to be specified as a complete order in time. @@ -19,12 +19,12 @@ machines mentioned above. Furthermore, the optimal algorithm even changes when the same machine architecture introduces a new, typically faster, implementation. And we are not just talking about simple algorithmic changes, such as loop order or blocking, sometimes even the underlying mathematics needs to change.

Given the complexity of writing parallel algorithms, this one-off nature of parallel algorithm design begged -the question: is there a parallel programming model that is invariant to the implementation technology of the machine?

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/spacetime/index.html b/ch1/spacetime/index.html index 38809fa..dff16b9 100644 --- a/ch1/spacetime/index.html +++ b/ch1/spacetime/index.html @@ -1,5 +1,5 @@ Constraints of Spacetime - Domain Flow Architecture -

Constraints of Spacetime

If you visualize the ‘world’ from the perspective of an operand flowing through a machine, +

Constraints of Spacetime

If you visualize the ‘world’ from the perspective of an operand flowing through a machine, you realize that a physical machine creates a specific spatial constraint for the movement of data. Processing nodes are fixed in space, and information is exchanged between nodes to accomplish some transformation. Nodes consume and generate information, and communication links move information (program and data) between nodes. @@ -22,12 +22,12 @@ the propagation of information. A computational event has to be able to ‘see’ its operands before it can commence. Otherwise stated, its operands need to lie in the future light cone.

These temporal constraints are further complicated by the fact that man-made structures today do not communicate through free space yet, and the physical communication structure adds additional constraints -on the shape and extend of the future cone.

These man-made computational structures are dubbed computational spacetimes.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/wavefront/index.html b/ch1/wavefront/index.html index 02d3599..b6c9886 100644 --- a/ch1/wavefront/index.html +++ b/ch1/wavefront/index.html @@ -1,5 +1,5 @@ Wavefronts of Computation - Domain Flow Architecture -

Wavefronts of Computation

Wavefronts of Computation

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/dfa/index.html b/ch2-moc/dfa/index.html index db7ebfb..faccb8c 100644 --- a/ch2-moc/dfa/index.html +++ b/ch2-moc/dfa/index.html @@ -1,5 +1,5 @@ -Domain Flow Architecture -

Domain Flow Architecture

With the advent of Very Large Scale Integration (VLSI), it became apparent +Domain Flow Architecture +

Domain Flow Architecture

With the advent of Very Large Scale Integration (VLSI), it became apparent that the control mechanism of the Stored Program Machine (SPM) to manage resource contention was not well-suited to the characteristics of VLSI 1. VLSI offers large amount of hardware at very low cost, but interconnections @@ -24,16 +24,20 @@ contention management problem for the special case of fully articulated arrays. But when the size of the problem was bigger than the available VLSI resources. the methodology broke down. Any approach that uses a divide-and-conquer approach -to aggregate activity, will need a control mechanism to reuse the processing -element, and that mechanism will not be able to follow the constraints of VLSI -as spatial relationships will have been destroyed. The solution to this problem -was offered by Omtzigt 4 in the form of infinite, but -bounded network architectures using spatial data flow tags to control resource contention.

footnotes

[1] Mead, C.A. and Conway, L.A., Introduction to VLSI Systems, 1978

[2] Kung, H.T. and Leiserson, C.E., Systolic Arrays for VLSI, CMU-CS-79-103, 1978

[3] Dan I. Moldovan, On the Design of Algorithms for VLSI Systolic Arrays, Proceedings of the IEEE, Volume 71, Number 1, January 1983

[4] Omtzigt, E.T.L., Domain Flow and streaming architectures, Proceedings of the International Conference on Application Specific Array Processors, 1990 conference paper

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/dfm/index.html b/ch2-moc/dfm/index.html index e91f56a..1be4cb1 100644 --- a/ch2-moc/dfm/index.html +++ b/ch2-moc/dfm/index.html @@ -1,5 +1,5 @@ -Data Flow Model - Domain Flow Architecture -

Data Flow Model

The Data Flow model emerged in the early 1970s through pioneering work by Jack Dennis at MIT1, +Data Flow Model - Domain Flow Architecture +

Data Flow Model

The Data Flow model emerged in the early 1970s through pioneering work by Jack Dennis at MIT1, paralleled by research by Robert Barton and Al Davis at the University of Utah2. This model arose as an alternative to the von Neumann architecture to create a framework for expressing parallelism. Unlike traditional von Neumann architectures, which execute instructions sequentially, the data flow model @@ -36,12 +36,12 @@ Modern tools and frameworks, like TensorFlow, Cloud Dataflow, and Apache Beam, draw on these foundational ideas to support parallel and streaming computations.

The data flow model and its extensions, including the Synchronous Data Flow model, represent a critical evolution in computational theory. By enabling parallelism and predictability, -these models have significantly influenced both theoretical research and practical applications.

Footnotes

[1] Jack B. Dennis, Data Flow Supercomputing, IEEE Computer, Volume 13, Issue 11, November 1980

[2] Utah Computing History Project

[3] A. L. Davis, Architecture and System Method of DDM1: A recursively structured data driven machine, ISCA 1978: Proceedings of the 5th annual symposium on Computer architecture, Pages 210-215

[4] Edward A. Lee, David G. Messerschmitt, Synchronous data flow, Proceedings of the IEEE ( Volume: 75, Issue: 9, September 1987)

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/index.html b/ch2-moc/index.html index 57d8e47..7ba46b9 100644 --- a/ch2-moc/index.html +++ b/ch2-moc/index.html @@ -7,7 +7,7 @@ Finite State Machines (FSM) Pushdown automata Turing machines Decision Tree Models Random Access Machine And parallel models of computation:">Computer Architecture - Domain Flow Architecture -

Computer Architecture

A model of computation describes how an output of a +

Computer Architecture

A model of computation describes how an output of a mathematical function is computed given an input. These models specify how units of computation, memories, and information (data) exchanges are organized. @@ -17,12 +17,12 @@ provides an implementation of the Random Access Machine model of computation, and the Data Flow Machine, a machine to execute Synchronous Data Flow.

And we’ll introduce the Domain Flow Architecture (DFA), which solves the problem of diminishing returns of the Data Flow Machine when -trying to scale up its size and concurrency.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/index.xml b/ch2-moc/index.xml index b6102dc..d26a803 100644 --- a/ch2-moc/index.xml +++ b/ch2-moc/index.xml @@ -1,5 +1,5 @@ Computer Architecture - Domain Flow Architecturehttps://stillwater-sc.github.io/domain-flow/ch2-moc/index.htmlA model of computation describes how an output of a mathematical function is computed given an input. These models specify how units of computation, memories, and information (data) exchanges are organized. The benefits provided by a model of computation is the measure of the computational complexity of an algorithm independent of any specific physical implementation. There are sequential models of computation: -Finite State Machines (FSM) Pushdown automata Turing machines Decision Tree Models Random Access Machine And parallel models of computation:Hugoen-usTue, 07 Jan 2025 13:20:04 -0500Random Access Machinehttps://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.htmlMon, 06 Jan 2025 16:43:57 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.htmlThe Random Access Machine (RAM) model of computation is a theoretical framework developed to analyze algorithms and computational efficiency. Introduced in the mid-20th century, the RAM model was devised to bridge the gap between high-level algorithmic analysis and the hardware implementation of computing systems. It simplifies the architecture of a physical computer into an idealized system that supports a sequential execution of instructions, allowing for the study of computational complexity independent of hardware idiosyncrasies.Data Flow Modelhttps://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.htmlSat, 06 Jan 2024 16:44:37 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.htmlThe Data Flow model emerged in the early 1970s through pioneering work by Jack Dennis at MIT1, paralleled by research by Robert Barton and Al Davis at the University of Utah2. This model arose as an alternative to the von Neumann architecture to create a framework for expressing parallelism. Unlike traditional von Neumann architectures, which execute instructions sequentially, the data flow model represents computation as a directed graph of data dependencies. Nodes in this graph correspond to operations, and edges represent data flowing between them. Execution is driven by the availability of data, allowing operations to proceed independently and in parallel. The data flow model was promising better parallel execution by eliminating the program counter and global updating of state that are essential in the operation of the Stored Program Machine.Domain Flow Architecturehttps://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.htmlSat, 06 Jan 2024 16:46:11 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.htmlWith the advent of Very Large Scale Integration (VLSI), it became apparent that the control mechanism of the Stored Program Machine (SPM) to manage resource contention was not well-suited to the characteristics of VLSI 1. VLSI offers large amount of hardware at very low cost, but interconnections between the logic devices are as expensive as the logic devices themselves for all three metrics: area, propagation delays, and energy. Effective use of VLSI technology is only achieved when the computational resource organization is constructed with local interconnections.Computer Organizationhttps://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.htmlTue, 07 Jan 2025 13:20:04 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.htmlWhat would be the best way to build scalable, parallel execution engines? +Finite State Machines (FSM) Pushdown automata Turing machines Decision Tree Models Random Access Machine And parallel models of computation:Hugoen-usTue, 07 Jan 2025 13:20:04 -0500Random Access Machinehttps://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.htmlMon, 06 Jan 2025 16:43:57 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.htmlThe Random Access Machine (RAM) model of computation is a theoretical framework developed to analyze algorithms and computational efficiency. Introduced in the mid-20th century, the RAM model was devised to bridge the gap between high-level algorithmic analysis and the hardware implementation of computing systems. It simplifies the architecture of a physical computer into an idealized system that supports a sequential execution of instructions, allowing for the study of computational complexity independent of hardware idiosyncrasies.Computer Organizationhttps://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.htmlTue, 07 Jan 2025 13:20:04 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.htmlWhat would be the best way to build scalable, parallel execution engines? In 1966, Michael J. Flynn, proposed a taxonomy based on two dimensions, the parallelism of data and instructions 1. -A purely sequential machine has a single instruction stream and a single data stream and the acronym SISD. A machine that applies the same instruction on multiple data elements is a SIMD machine, short for Single Instruction Multiple Data. Machines that have multiple instruction streams operating on a single data element as used in fault-tolerant and redundant system designs, and carry the designation MISD, Multiple Instruction Single Data. The Multiple Instruction Multiple Data machine, or MIMD, consists of many processing elements simultaneously operating on different data.Next Stepshttps://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.htmlMon, 06 Jan 2025 16:49:53 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.htmlWe have quickly introduced computer hardware organization to deliver resource contention management. Our next step is to look at specific algorithms, and explore their optimal parallel execution dynamics. \ No newline at end of file +A purely sequential machine has a single instruction stream and a single data stream and the acronym SISD. A machine that applies the same instruction on multiple data elements is a SIMD machine, short for Single Instruction Multiple Data. Machines that have multiple instruction streams operating on a single data element as used in fault-tolerant and redundant system designs, and carry the designation MISD, Multiple Instruction Single Data. The Multiple Instruction Multiple Data machine, or MIMD, consists of many processing elements simultaneously operating on different data.Data Flow Modelhttps://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.htmlMon, 06 Jan 2025 16:44:37 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.htmlThe Data Flow model emerged in the early 1970s through pioneering work by Jack Dennis at MIT1, paralleled by research by Robert Barton and Al Davis at the University of Utah2. This model arose as an alternative to the von Neumann architecture to create a framework for expressing parallelism. Unlike traditional von Neumann architectures, which execute instructions sequentially, the data flow model represents computation as a directed graph of data dependencies. Nodes in this graph correspond to operations, and edges represent data flowing between them. Execution is driven by the availability of data, allowing operations to proceed independently and in parallel. The data flow model was promising better parallel execution by eliminating the program counter and global updating of state that are essential in the operation of the Stored Program Machine.Domain Flow Architecturehttps://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.htmlMon, 06 Jan 2025 16:46:11 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.htmlWith the advent of Very Large Scale Integration (VLSI), it became apparent that the control mechanism of the Stored Program Machine (SPM) to manage resource contention was not well-suited to the characteristics of VLSI 1. VLSI offers large amount of hardware at very low cost, but interconnections between the logic devices are as expensive as the logic devices themselves for all three metrics: area, propagation delays, and energy. Effective use of VLSI technology is only achieved when the computational resource organization is constructed with local interconnections.Next Stepshttps://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.htmlMon, 06 Jan 2025 16:49:53 -0500https://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.htmlWe have quickly introduced different computer hardware organizations to deliver resource contention management required for the execution of algorithms that exhibit problem sizes larger than the available hardware resources. Our next step is to look at specific algorithms, and explore their optimal parallel execution dynamics. \ No newline at end of file diff --git a/ch2-moc/nextsteps/index.html b/ch2-moc/nextsteps/index.html index b5283d3..a5dc485 100644 --- a/ch2-moc/nextsteps/index.html +++ b/ch2-moc/nextsteps/index.html @@ -1,13 +1,14 @@ -Next Steps - Domain Flow Architecture -

Next Steps

We have quickly introduced computer hardware organization to deliver -resource contention management. +Next Steps - Domain Flow Architecture +

Next Steps

We have quickly introduced different computer hardware organizations to deliver +resource contention management required for the execution of algorithms that +exhibit problem sizes larger than the available hardware resources. Our next step is to look at specific algorithms, and explore -their optimal parallel execution dynamics.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/spm/index.html b/ch2-moc/spm/index.html index b276027..3409e09 100644 --- a/ch2-moc/spm/index.html +++ b/ch2-moc/spm/index.html @@ -1,5 +1,5 @@ Random Access Machine - Domain Flow Architecture -

Random Access Machine

The Random Access Machine (RAM) model of computation is a theoretical framework developed to analyze algorithms and computational efficiency. Introduced in the mid-20th century, the RAM model was devised to bridge the gap between high-level algorithmic analysis and the hardware implementation of computing systems. It simplifies the architecture of a physical computer into an idealized system that supports a sequential execution of instructions, allowing for the study of computational complexity independent of hardware idiosyncrasies.

The RAM model emulates the basic operation of the Turing machine using the following key components:

  1. An infinite sequence of memory cells, each capable of storing arbitrary-length integers
  2. A finite set of instructions that operate on these registers
  3. A program counter that keeps track of the current instruction
  4. An accumulator register for arithmetic operations

The infinite sequence of cells is equivalent to the infinite Turing Machine tape, and the program counter is a more capable representation of tape movement. +

Random Access Machine

The Random Access Machine (RAM) model of computation is a theoretical framework developed to analyze algorithms and computational efficiency. Introduced in the mid-20th century, the RAM model was devised to bridge the gap between high-level algorithmic analysis and the hardware implementation of computing systems. It simplifies the architecture of a physical computer into an idealized system that supports a sequential execution of instructions, allowing for the study of computational complexity independent of hardware idiosyncrasies.

The RAM model emulates the basic operation of the Turing machine using the following key components:

  1. An infinite sequence of memory cells, each capable of storing arbitrary-length integers
  2. A finite set of instructions that operate on these registers
  3. A program counter that keeps track of the current instruction
  4. An accumulator register for arithmetic operations

The infinite sequence of cells is equivalent to the infinite Turing Machine tape, and the program counter is a more capable representation of tape movement. A key inspiration for the RAM model of computation came from the von Neumann architecture, which was conceptualized by John von Neumann in the late 1940s. The von Neumann architecture’s hallmark feature is the stored-program concept, where instructions and data are stored in the same memory. The von-Neumann machine, later renamed to the Stored Program Machine had a profound impact on the design and development of modern computers, enabling programmability and flexibility.

Basic Operating Principle

After loading a program into the main memory of the Stored Program Machine, the Operating System writes the address of the entry point of the program @@ -10,12 +10,12 @@ provide a practical solution. The constraints created by the memory hierarchy required significant alteration of the core algorithms to yield reasonable performance. And as chip process technology entered the 90nm regime, the energy consumed by the control infrastructure and memory hierarchy became dominant. This is favoring micro-architectures that amortize more arithmetic density per instruction. However, the energy inefficiency of the Stored Program Machine is limiting, and new machine organizations are required to continue to improve computational density. The following sections will introduce two specific -improvements to support concurrency and improve energy efficiency beyond what the Stored Program Machine can offer.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/taxonomy/index.html b/ch2-moc/taxonomy/index.html index c93e325..e7d0829 100644 --- a/ch2-moc/taxonomy/index.html +++ b/ch2-moc/taxonomy/index.html @@ -7,7 +7,7 @@ A purely sequential machine has a single instruction stream and a single data stream and the acronym SISD. A machine that applies the same instruction on multiple data elements is a SIMD machine, short for Single Instruction Multiple Data. Machines that have multiple instruction streams operating on a single data element as used in fault-tolerant and redundant system designs, and carry the designation MISD, Multiple Instruction Single Data. The Multiple Instruction Multiple Data machine, or MIMD, consists of many processing elements simultaneously operating on different data.">Computer Organization - Domain Flow Architecture -

Computer Organization

What would be the best way to build scalable, parallel execution engines?

In 1966, Michael J. Flynn, proposed a taxonomy based on two dimensions, the parallelism of +

Computer Organization

What would be the best way to build scalable, parallel execution engines?

In 1966, Michael J. Flynn, proposed a taxonomy based on two dimensions, the parallelism of data and instructions 1.

A purely sequential machine has a single instruction stream and a single data stream and the acronym SISD. A machine that applies the same instruction on multiple data elements is a SIMD machine, @@ -51,12 +51,12 @@ real-time systems tend to favor fine-grain parallelism. Fine-grain parallel systems offer lower latencies, and an increasingly important benefit, energy efficiency. In the next chapter, we’ll discuss the techniques used to design spatial mappings -for fine-grained parallel machines.

Footnotes

[1] Flynn, Michael J. (December 1966), Very high-speed computing systems

[2] Flynn’s taxonomy Wikipedia

[3] The Landscape of Parallel Computing Research: A View from Berkeley The Seven Dwarfs

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/control/index.html b/ch3-design/control/index.html index 9827d20..f8e0a53 100644 --- a/ch3-design/control/index.html +++ b/ch3-design/control/index.html @@ -3,7 +3,7 @@ We have seen the Data Flow Machine (DFM) use a different mechanism. Here instructions fire when all their input data is available. When they fire, an instruction token is injected into a network of processing units to be executed. The result of that execution is encapsulated into a data token, which is send back to the central Content Addressable Memory where the token is matched with all the instructions it is part of.">Control: the how - Domain Flow Architecture -

Control: the how

Fundamentally, the Stored Program Machine (SPM) relies on a request/reply protocol +

Control: the how

Fundamentally, the Stored Program Machine (SPM) relies on a request/reply protocol to get information to and from memory. Otherwise stated, the resource contention mechanism deployed by a SPM uses a random access memory to store inputs, intermediate, and output values.

We have seen the Data Flow Machine (DFM) use a different mechanism. Here instructions @@ -17,12 +17,12 @@ an unnecessary penalty. But the DFM does provide a hint of how to maintain fine-grain parallelism: its pipeline is a ring, which is an infinite, but bounded structure.

The Domain Flow Architecture (DFA) builds upon this observation and supports and maintains a local fine-grain spatial structure while offering an infinite computational -fabric with finite resources. DFA is to DFM what PIM is to SPM.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/currentstate/index.html b/ch3-design/currentstate/index.html index af0e8ea..3b1c5c8 100644 --- a/ch3-design/currentstate/index.html +++ b/ch3-design/currentstate/index.html @@ -1,5 +1,5 @@ Computational Dynamics - Domain Flow Architecture -

Computational Dynamics

A memory access in a physical machine can be very complex. For example, +

Computational Dynamics

A memory access in a physical machine can be very complex. For example, when a program accesses an operand located at an address that is not in physical memory, the processor registers a page miss. The performance difference between an access from the local L1 cache versus a page miss @@ -33,12 +33,12 @@ modulation due to power constraints, causes the collective to wait for the slowest process. As the number of processors grows, so does variability. And unfortunately, when variability rises processor -utilization drops and algorithmic performance suffers.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/elements/index.html b/ch3-design/elements/index.html index b510fab..fcd5df9 100644 --- a/ch3-design/elements/index.html +++ b/ch3-design/elements/index.html @@ -15,7 +15,7 @@ Item #2 is well-known among high-performance algorithm designers. Item #3 is well-known among hardware designers and computer engineers. When designing domain flow algorithms, we are looking for an energy efficient embedding of a computational graph in space, and it is thus to be expected that we need to combine all three attributes of minimizing operator count, operand movement, and resource contention. The complexity of minimizing resource contention is what makes hardware design so much more complex. But the complexity of operator contention can be mitigated by clever resource contention management.">Elements of Design - Domain Flow Architecture -

Elements of Design

We can summarize the attributes of good parallel algorithm design as

  1. low operation count, where operation count is defined as the sum of operators and operand accesses
  2. minimal operand movement
  3. minimal resource contention

Item #1 is well-known by theoretical computer scientists.

Item #2 is well-known among high-performance algorithm designers.

Item #3 is well-known among hardware designers and computer engineers.

When designing domain flow algorithms, we are looking for an energy +

Elements of Design

We can summarize the attributes of good parallel algorithm design as

  1. low operation count, where operation count is defined as the sum of operators and operand accesses
  2. minimal operand movement
  3. minimal resource contention

Item #1 is well-known by theoretical computer scientists.

Item #2 is well-known among high-performance algorithm designers.

Item #3 is well-known among hardware designers and computer engineers.

When designing domain flow algorithms, we are looking for an energy efficient embedding of a computational graph in space, and it is thus to be expected that we need to combine all three attributes of minimizing operator count, operand movement, and resource contention. @@ -31,12 +31,12 @@ it forces a total order on the computation graph. This tasks of creating the total order falls on the algorithm designer.

For parallel execution we need a resource contention management mechanism that is more efficient. And this is where our -computational spacetime will come in handy.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/energy/index.html b/ch3-design/energy/index.html index e7e69bb..4ee1092 100644 --- a/ch3-design/energy/index.html +++ b/ch3-design/energy/index.html @@ -3,7 +3,7 @@ Fundamentally, the SPM relies on a request/reply protocol to get information from a memory. Otherwise stated, the resource contention mechanism deployed by a SPM uses a random access memory to store inputs, intermediate, and output values. And all this memory management uses this request/reply cycle. Which we now know is becoming less and less energy efficient compared to the actual computational event the algorithm requires. The sequential processing model is becoming less and less energy efficient.">Energy: the how efficient - Domain Flow Architecture -

Energy: the how efficient

Table 1 shows switching energy estimates of key computational events by process node. +

Energy: the how efficient

Table 1 shows switching energy estimates of key computational events by process node. Data movement operations (reads and writes) have started to dominate energy consumption in modern processors. This makes a Stored Program Machine (SPM) less and less efficient. To counter this, all CPUs, GPUs, and DSPs have started to add instructions that amortize @@ -29,12 +29,12 @@ an unnecessary penalty. But the DFM does provide a hint of how to maintain fine-grain parallelism: its pipeline is a ring, which is an infinite, but bounded structure.

The Domain Flow Architecture (DFA) builds upon this observation and supports and maintains a local fine-grain spatial structure while offering an infinite computational -fabric with finite resources. DFA is to DFM as PIM is to SPM.

Values in picojoules (pJ) per operation

Operation Type28/22nm16/14/12nm7/6/5nm3nm2nm
32-bit Register Read0.0400.0250.0120.0080.006
32-bit Register Write0.0450.0280.0140.0090.007
32-bit ALU Operation0.1000.0600.0300.0200.015
32-bit FPU Add0.4000.2500.1200.0800.060
32-bit FPU Multiply0.8000.5000.2500.1700.130
32-bit FPU FMA1.0000.6000.3000.2000.150
32-bit Word Read L10.6250.3750.18750.1250.09375
32-bit Word Read L21.8751.1250.56250.3750.28125
32-bit Word Read DDR56.255.0003.7503.1252.8125
64-byte L1 Cache Read10.0006.0003.0002.0001.500
64-byte L2 Cache Read30.00018.0009.0006.0004.500
64-byte DDR5 Memory Read100.00080.00060.00050.00045.000

Table 1: Switching Energy Estimate by Process Node

note

  1. 32-bit cache and memory operations are derived from 64byte read energy
  2. Smaller process nodes generally reduces switching energy by roughly 40-50% per major node transition
\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/index.html b/ch3-design/index.html index 212dcba..d6f62c9 100644 --- a/ch3-design/index.html +++ b/ch3-design/index.html @@ -1,16 +1,16 @@ Elements of Good Design - Domain Flow Architecture -

Elements of Good Design

The best algorithms for sequential execution are those that minimize the number +

Elements of Good Design

The best algorithms for sequential execution are those that minimize the number of operations to yield results. Computational complexity theory has aided this quest, but any performance-minded algorithm designer knows that the best theoretical algorithms are not necessarily the fastest when executed on real hardware. The difference is typically caused by the trade-off sequential algorithms have to make between computation and accessing memory. The constraints of data movement are even more pronounced -in parallel algorithms as demonstrated in the previous section.

This chapter explores the elements of good design for parallel algorithms and their execution on real hardware.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/nextsteps/index.html b/ch3-design/nextsteps/index.html index cc614c1..940f7f0 100644 --- a/ch3-design/nextsteps/index.html +++ b/ch3-design/nextsteps/index.html @@ -3,19 +3,19 @@ Once we get a good collection of fast, and energy efficient algorithms together, we can start to explore how best to engineer combinations of these operators. We will discover that sometimes, the cost of an information exchange makes a whole class of algorithms unattractive for parallel executions. With that insight comes the need to create new algorithms and sometimes completely new mathematical approaches to properly leverage the available resources.">Next Steps - Domain Flow Architecture -

Next Steps

In this short introduction to parallel algorithms in general and domain flow +

Next Steps

In this short introduction to parallel algorithms in general and domain flow in particular, our next step is to look at specific algorithms, and explore their optimal parallel execution dynamics.

Once we get a good collection of fast, and energy efficient algorithms together, we can start to explore how best to engineer combinations of these operators. We will discover that sometimes, the cost of an information exchange makes a whole class of algorithms unattractive for parallel executions. With that insight comes the need to create new algorithms and sometimes completely new -mathematical approaches to properly leverage the available resources.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/space/index.html b/ch3-design/space/index.html index 5728f0c..e8ff6ee 100644 --- a/ch3-design/space/index.html +++ b/ch3-design/space/index.html @@ -1,16 +1,16 @@ Space: the where - Domain Flow Architecture -

Space: the where

Space is a scarce resource, with a direct cost associated to it. A computational engine, +

Space: the where

Space is a scarce resource, with a direct cost associated to it. A computational engine, such as a Stored Program Machine, needs to allocate area for ALUs and register files, and to make these work well, even more space is required to surround these resources with cache hierarchies and memory controllers. But even if space was freely available, it still presents a cost from a parallel computational perspective, since it takes energy to get information across -space, as it takes time to do so.

Footnotes

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/switching-energy/index.html b/ch3-design/switching-energy/index.html index 1e38f41..214e10c 100644 --- a/ch3-design/switching-energy/index.html +++ b/ch3-design/switching-energy/index.html @@ -7,13 +7,13 @@ Register 28/22nm (fJ) 16/14/12nm (fJ) 7/6/5nm (fJ) 3nm (fJ) 2nm (fJ) Read bit 2.5 - 3.5 1.8 - 2.3 0.9 - 1.2 0.6 - 0.8 0.4 - 0.6 Write bit 3.0 - 4.0 2.0 - 2.8 1.1 - 1.5 0.7 - 1.0 0.5 - 0.8 Notes:">Switching Energy Estimates - Domain Flow Architecture -

Switching Energy Estimates

This page contains background information regarding the switching energy estimates so -important to designing energy-efficient data paths.

Register Read/Write Energy Estimates by Process Node

Note: Values are approximate and may vary by foundry and implementation

Register28/22nm (fJ)16/14/12nm (fJ)7/6/5nm (fJ)3nm (fJ)2nm (fJ)
Read bit2.5 - 3.51.8 - 2.30.9 - 1.20.6 - 0.80.4 - 0.6
Write bit3.0 - 4.02.0 - 2.81.1 - 1.50.7 - 1.00.5 - 0.8

Notes:

  • Values assume typical operating conditions (TT corner, nominal voltage, 25°C)
  • Energy includes both dynamic and short-circuit power
  • Leakage power not included
  • Values are for basic register operations without additional clock tree or routing overhead
  • Advanced nodes (3nm, 2nm) are based on early estimates and projections

Register file energy estimates

All values in femtojoules per bit (fJ/bit)

OperationSize28/22nm16/14/12nm7/6/5nm3nm2nm
Read
32-entry8.5 - 10.56.00 - 7.503.20 - 4.002.25 - 2.801.57 - 1.95
64-entry12.0 - 14.08.50 - 10.004.50 - 5.503.15 - 3.852.21 - 2.70
128-entry16.0 - 18.011.00 - 13.006.00 - 7.004.20 - 4.902.95 - 3.45
Write
32-entry10.0 - 12.07.00 - 8.503.80 - 4.602.65 - 3.251.85 - 2.28
64-entry14.0 - 16.010.00 - 11.505.20 - 6.203.65 - 4.352.55 - 3.05
128-entry18.0 - 20.013.00 - 15.07.00 - 8.004.90 - 5.603.45 - 3.95

Notes:

  • All values in femtojoules per bit (fJ/bit)
  • Assumes typical operating conditions (TT corner, nominal voltage, 25°C)
  • Includes decoder, wordline, and bitline energy
  • Includes local clock distribution
  • Includes both dynamic and short-circuit power
  • Values represent single read port, single write port configuration

Integer Arithmetic and Logic Unit Switching Energy Estimates

Unit TypeBit Size28/22nm (pJ)16/14/12nm (pJ)7/6/5nm (pJ)3nm (pJ)2nm (pJ)
CPU ALU
8-bit0.45 - 0.650.30 - 0.430.20 - 0.290.13 - 0.190.09 - 0.13
16-bit0.90 - 1.300.60 - 0.860.40 - 0.580.26 - 0.380.18 - 0.26
24-bit1.35 - 1.950.90 - 1.300.60 - 0.870.39 - 0.570.27 - 0.40
32-bit1.80 - 2.601.20 - 1.730.80 - 1.160.52 - 0.760.36 - 0.53
40-bit2.25 - 3.251.50 - 2.161.00 - 1.450.65 - 0.950.45 - 0.66
48-bit2.70 - 3.901.80 - 2.601.20 - 1.740.78 - 1.140.54 - 0.79
56-bit3.15 - 4.552.10 - 3.031.40 - 2.030.91 - 1.330.63 - 0.92
64-bit3.60 - 5.202.40 - 3.471.60 - 2.321.04 - 1.520.72 - 1.05
GPU ALU
8-bit0.60 - 0.850.40 - 0.570.27 - 0.380.17 - 0.250.12 - 0.17
16-bit1.20 - 1.700.80 - 1.140.53 - 0.760.35 - 0.500.24 - 0.35
24-bit1.80 - 2.551.20 - 1.710.80 - 1.140.52 - 0.750.36 - 0.52
32-bit2.40 - 3.401.60 - 2.281.07 - 1.520.69 - 1.000.48 - 0.70
40-bit3.00 - 4.252.00 - 2.851.33 - 1.900.86 - 1.250.60 - 0.87
48-bit3.60 - 5.102.40 - 3.421.60 - 2.281.04 - 1.500.72 - 1.04
56-bit4.20 - 5.952.80 - 3.991.87 - 2.661.21 - 1.750.84 - 1.21
64-bit4.80 - 6.803.20 - 4.562.13 - 3.041.38 - 2.000.96 - 1.38
DSP ALU
8-bit0.55 - 0.750.37 - 0.530.25 - 0.350.16 - 0.230.11 - 0.16
16-bit1.10 - 1.500.73 - 1.000.49 - 0.700.32 - 0.460.22 - 0.32
24-bit1.65 - 2.251.10 - 1.500.73 - 1.050.48 - 0.690.33 - 0.48
32-bit2.20 - 3.001.47 - 2.000.98 - 1.400.63 - 0.920.44 - 0.64
40-bit2.75 - 3.751.83 - 2.501.22 - 1.750.79 - 1.150.55 - 0.80
48-bit3.30 - 4.502.20 - 3.001.47 - 2.100.95 - 1.380.66 - 0.96
56-bit3.85 - 5.252.57 - 3.501.71 - 2.451.11 - 1.610.77 - 1.12
64-bit4.40 - 6.002.93 - 4.001.96 - 2.801.27 - 1.840.88 - 1.28

Notes:

  • Values are approximate switching energy in picojoules (pJ)
  • Represents typical dynamic switching energy per operation
  • Accounts for:
    • Arithmetic data path logic operations
    • Typical instruction mix for each design point

Floating-Point Unit Switching Energy Estimates

Unit TypeBit Size28/22nm (pJ)16/14/12nm (pJ)7/6/5nm (pJ)3nm (pJ)2nm (pJ)
CPU FPU
8-bit1.20 - 1.700.80 - 1.140.53 - 0.760.35 - 0.500.24 - 0.35
16-bit1.80 - 2.601.20 - 1.730.80 - 1.160.52 - 0.760.36 - 0.53
32-bit3.60 - 5.202.40 - 3.471.60 - 2.321.04 - 1.520.72 - 1.05
64-bit7.20 - 10.404.80 - 6.933.20 - 4.642.08 - 3.041.44 - 2.10
GPU FPU
8-bit1.60 - 2.301.07 - 1.530.71 - 1.020.46 - 0.660.32 - 0.46
16-bit2.40 - 3.401.60 - 2.281.07 - 1.520.69 - 1.000.48 - 0.70
32-bit4.80 - 6.803.20 - 4.562.13 - 3.041.38 - 2.000.96 - 1.38
64-bit9.60 - 13.606.40 - 9.134.27 - 6.082.76 - 4.001.92 - 2.76
DSP FPU
8-bit1.40 - 2.000.93 - 1.330.62 - 0.890.40 - 0.580.28 - 0.40
16-bit2.20 - 3.001.47 - 2.000.98 - 1.400.63 - 0.920.44 - 0.64
32-bit4.40 - 6.002.93 - 4.001.96 - 2.801.27 - 1.840.88 - 1.28
64-bit8.80 - 12.005.87 - 8.003.91 - 5.602.54 - 3.681.76 - 2.56

Notes:

  • Values are approximate switching energy in picojoules (pJ)
  • 8-bit FPU estimates based on IEEE fp8 standard
  • Represents typical dynamic switching energy per operation
  • Accounts for:
    • Arithmetic logic operations
    • Floating-point operations (for FPU)
    • Typical instruction mix for each design point
\ No newline at end of file diff --git a/ch3-design/time/index.html b/ch3-design/time/index.html index 59d816a..0b7c5cd 100644 --- a/ch3-design/time/index.html +++ b/ch3-design/time/index.html @@ -3,7 +3,7 @@ Let x be a computation that uses y as input, then the free schedule is defined as: \begin{equation} T(x) =\begin{cases} 1, & \text{if y is an external input}\\ 1 + max(T(y)) & \text{otherwise} \end{cases} \end{equation} The free schedule is defined at the level of the individual operations. It does not provide any information about the global data movement or the global structure of the interactions between data and operation. Moreover, the above equation describes a logical sequencing of operations, it does not specify a physical evolution.">Time: the when - Domain Flow Architecture -

Time: the when

The free schedule represents the inherent concurrency of the parallel algorithm, and, under the assumption +

Time: the when

The free schedule represents the inherent concurrency of the parallel algorithm, and, under the assumption of infinite resources, it is the fastest schedule possible.

Let x be a computation that uses y as input, then the free schedule is defined as: \begin{equation} T(x) =\begin{cases} @@ -66,12 +66,12 @@ recurrence equations has practical application for the design of optimal computational data paths. The Domain Flow model uses the Karp, Miller, and Winograd piecewise linear scheduling -construction to sequence activity wavefronts.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch4/index.html b/ch4/index.html index 15e2fce..89e5452 100644 --- a/ch4/index.html +++ b/ch4/index.html @@ -7,16 +7,16 @@ components in computational methods, the investment can pay high dividends.">Basic Linear Algebra - Domain Flow Architecture -

Basic Linear Algebra

Basic Linear Algebra Subroutines are an historically significant set of +

Basic Linear Algebra

Basic Linear Algebra Subroutines are an historically significant set of functions that encapsulate the basic building blocks of a large collection of linear algebra algorithms and implementation.

The BLAS library has proven to be a very productive mechanism to create and disseminate highly optimized numerical libraries to a plethora of computer architectures and machines. Writing high-performance linear -algebra algorithms turns out to be a tenacious problem, but since linear algebra operations are essential
components in computational methods, the investment can pay high dividends.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch4/level1/index.html b/ch4/level1/index.html index 00237b5..9add593 100644 --- a/ch4/level1/index.html +++ b/ch4/level1/index.html @@ -7,7 +7,7 @@ vector scale: scalar-vector multiplication: $z = \alpha x \implies (z_i = \alpha x_i)$ vector element addition: $z = x + y \implies (z_i = x_i + y_i)$ vector element multiply: $z = x * y \implies (z_i = x_i * y_i)$ vector dot product: $c = x^Ty \implies ( c = \sum_{i = 1}^n x_i y_i ) $, aka inner-product saxpy, or scalar alpha x plus y, $z = \alpha x + y \implies z_i = \alpha x_i + y_i $ The fifth operator, while technically redundant, makes the expressions of linear algebra algorithms more concise.">BLAS Level 1 - Domain Flow Architecture -

BLAS Level 1

BLAS Level 1 are $\mathcal{O}(N)$ class operators. This makes these operators operand access limited +

BLAS Level 1

BLAS Level 1 are $\mathcal{O}(N)$ class operators. This makes these operators operand access limited and thus require careful distribution in a parallel environment.

There are four basic vector operations, and a fifth convenience operators. Let $ \alpha \in \Bbb{R}, x \in \Bbb{R^n}, y \in \Bbb{R^n}, z \in \Bbb{R^n}$$ then:

  1. vector scale: scalar-vector multiplication: $z = \alpha x \implies (z_i = \alpha x_i)$
  2. vector element addition: $z = x + y \implies (z_i = x_i + y_i)$
  3. vector element multiply: $z = x * y \implies (z_i = x_i * y_i)$
  4. vector dot product: $c = x^Ty \implies ( c = \sum_{i = 1}^n x_i y_i ) $, aka inner-product
  5. saxpy, or scalar alpha x plus y, $z = \alpha x + y \implies z_i = \alpha x_i + y_i $

The fifth operator, while technically redundant, makes the expressions of linear algebra algorithms more concise.

One class of domain flow programs for these operators assumes a linear distribution of the vectors, @@ -51,12 +51,12 @@ z: alpha[i-1,j,k] * x[i,j-1,k] + y[i,j,k-1] }

The final scalar alpha x plus y, or saxpy operator is combining the vector scale and vector addition operators, and will show the same constraint as the vector scale -operator due to the required propagation broadcast of the scaling factor.

\ No newline at end of file diff --git a/ch4/level2/index.html b/ch4/level2/index.html index 70b174d..301c11f 100644 --- a/ch4/level2/index.html +++ b/ch4/level2/index.html @@ -3,7 +3,7 @@ Let $A \in \Bbb{R^{mxn}}$, the matrix-vector product is defined as: $$z = Ax, \space where \space x \in \Bbb{R^n}$$">BLAS Level 2 - Domain Flow Architecture -

BLAS Level 2

BLAS Level 2 are $\mathcal{O}(N^2)$ class operators, still operand access +

BLAS Level 2

BLAS Level 2 are $\mathcal{O}(N^2)$ class operators, still operand access limited as we need to fetch multiple operands per operation without any reuse. The core operator is the matrix-vector multiplication in all its different forms specialized for matrix shape — triangular, banded, symmetric —, and matrix type — integer, real, complex, @@ -15,12 +15,12 @@ x: x[i,j-1,k] z: a[i,j,k-1] * x[i,j-1,k] }

Banded, symmetric, and triangular versions simply alter the constraint set of the domains of -computation: the fundamental dependencies do not change.

\ No newline at end of file diff --git a/ch4/level3/index.html b/ch4/level3/index.html index 3db2f98..5cdf555 100644 --- a/ch4/level3/index.html +++ b/ch4/level3/index.html @@ -11,7 +11,7 @@ In addition to matrix-matrix multiply there are the Rank-k update operators, which are outer-products and matrix additions. Here is a Hermitian Rank-k update: $$ C = \alpha A A^T + \beta C, \space where \space C \space is \space Hermitian. $$ A Hermitian matrix is defined as a matrix that is equal to its Hermitian conjugate. In other words, the matrix $C$ is Hermitian if and only if $C = C^H$. Obviously a Hermitian matrix must be square. Hermitian matrices can be understood as the complex extension of real symmetric matrices.">BLAS Level 3 - Domain Flow Architecture -

BLAS Level 3

BLAS Level 3 are $\mathcal{O}(N^3)$ operators, and finally compute bound +

BLAS Level 3

BLAS Level 3 are $\mathcal{O}(N^3)$ operators, and finally compute bound creating many opportunities to optimize operand reuse.

In addition to matrix-matrix multiply there are the Rank-k update operators, which are outer-products and matrix additions.

Here is a Hermitian Rank-k update:

$$ C = \alpha A A^T + \beta C, \space where \space C \space is \space Hermitian. $$

A Hermitian matrix is defined as a matrix that is equal to its Hermitian conjugate. In other words, the matrix $C$ is Hermitian if and only if $C = C^H$. Obviously a Hermitian @@ -32,12 +32,12 @@ c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } } -

Here we introduce a conditional constraint that impacts the domain of computation for a set of equations.

\ No newline at end of file diff --git a/ch5/factorization/index.html b/ch5/factorization/index.html index 2b7481f..877a877 100644 --- a/ch5/factorization/index.html +++ b/ch5/factorization/index.html @@ -3,12 +3,12 @@ $$ x = {-b \pm \sqrt{b^2-4ac} \over 2a} $$">Matrix Factorizations - Domain Flow Architecture -

Matrix Factorizations

This is the quadratic equation:

$$ x = {-b \pm \sqrt{b^2-4ac} \over 2a} $$
\ No newline at end of file diff --git a/ch5/index.html b/ch5/index.html index 2286d20..3e8140c 100644 --- a/ch5/index.html +++ b/ch5/index.html @@ -1,12 +1,12 @@ Matrix Factorization - Domain Flow Architecture -

Matrix Factorization

Matrix factorizations are the work horse of linear algebra applications. +

Matrix Factorization

Matrix factorizations are the work horse of linear algebra applications. Factorizations create equivalences that improve the usability or robustness -of an algorithm.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch6/index.html b/ch6/index.html index 5f701c0..ffc84fa 100644 --- a/ch6/index.html +++ b/ch6/index.html @@ -1,12 +1,12 @@ Matrix Kernels - Domain Flow Architecture -

Matrix Kernels

Matrix Kernels are important to characterize and classify the underlying system of equations. +

Matrix Kernels

Matrix Kernels are important to characterize and classify the underlying system of equations. Identifying singularity, and quantifying the null-space of a matrix are key operators -before we can try to solve systems of equations.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch6/matrixkernels/index.html b/ch6/matrixkernels/index.html index 8432af5..9ca901a 100644 --- a/ch6/matrixkernels/index.html +++ b/ch6/matrixkernels/index.html @@ -7,16 +7,16 @@ $L$ is the vector space of all elements $v$ of $V$ such that $L(v) = 0$, where 0 denotes the zero vector in $W, or more symbolically:">Matrix Kernels - Domain Flow Architecture -

Matrix Kernels

In mathematics, the kernel of a linear map, also known as the null space or nullspace, is the linear subspace +

Matrix Kernels

In mathematics, the kernel of a linear map, also known as the null space or nullspace, is the linear subspace of the domain of the map which is mapped to the zero vector. That is, given a linear map

$$L : V \rightarrow W$$ between two vector spaces $V$ and $W$, the kernel of

$L$ is the vector space of all elements $v$ of $V$ such that $L(v) = 0$, -where 0 denotes the zero vector in $W, or more symbolically:

$$ker(L) = \{ v \in V \hspace1ex | \hspace1ex L(v) = 0\} = L^{-1}(0)$$.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch7/index.html b/ch7/index.html index fd02e11..88cba3c 100644 --- a/ch7/index.html +++ b/ch7/index.html @@ -1,10 +1,10 @@ Linear Solvers - Domain Flow Architecture -

Linear Solvers

Solving systems of equations is the impetus for the class of algorithms called linear solvers.

\ No newline at end of file diff --git a/ch7/lu/index.html b/ch7/lu/index.html index e7de7ca..4ca8039 100644 --- a/ch7/lu/index.html +++ b/ch7/lu/index.html @@ -3,15 +3,15 @@ $$A = L \otimes U$$.">Gaussian Elimination - Domain Flow Architecture -

Gaussian Elimination

Gaussian Elimination, also known as $LU$ decomposition, decomposes a linear transformation +

Gaussian Elimination

Gaussian Elimination, also known as $LU$ decomposition, decomposes a linear transformation defined by the matrix $A$ into a lower-triangular matrix $L$, and an upper-triangular matrix $U$ -such that

$$A = L \otimes U$$.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch7/solvers/index.html b/ch7/solvers/index.html index a4bed15..87d146b 100644 --- a/ch7/solvers/index.html +++ b/ch7/solvers/index.html @@ -1,10 +1,10 @@ Linear Solvers - Domain Flow Architecture -

Linear Solvers

Linear solvers are algorithms designed to solve a linear system of equations.

\ No newline at end of file diff --git a/ch8/conditioning/index.html b/ch8/conditioning/index.html index 5a158f2..c9f8754 100644 --- a/ch8/conditioning/index.html +++ b/ch8/conditioning/index.html @@ -1,10 +1,10 @@ Signal Conditioning - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/filters/index.html b/ch8/filters/index.html index b56cdf9..960809a 100644 --- a/ch8/filters/index.html +++ b/ch8/filters/index.html @@ -1,10 +1,10 @@ Digital Filtering - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/identification/index.html b/ch8/identification/index.html index 7951548..0bf23ed 100644 --- a/ch8/identification/index.html +++ b/ch8/identification/index.html @@ -3,7 +3,7 @@ When there are signals and noises, physicists try to identify signals by modeling them, whereas statisticians oppositely try to model noise to identify signals. In this study, we applied the statisticians’ concept of signal detection of physics data with small-size samples and high dimensions without modeling the signals. Most of the data in nature, whether noises or signals, are assumed to be generated by dynamical systems; thus, there is essentially no distinction between these generating processes. We propose that the correlation length of a dynamical system and the number of samples are crucial for the practical definition of noise variables among the signal variables generated by such a system. Since variables with short-term correlations reach normal distributions faster as the number of samples decreases, they are regarded to be noise-like variables, whereas variables with opposite properties are signal-like variables. Normality tests are not effective for data of small-size samples with high dimensions. Therefore, we modeled noises on the basis of the property of a noise variable, that is, the uniformity of the histogram of the probability that a variable is a noise. We devised a method of detecting signal variables from the structural change of the histogram according to the decrease in the number of samples. We applied our method to the data generated by globally coupled map, which can produce time series data with different correlation lengths, and also applied to gene expression data, which are typical static data of small-size samples with high dimensions, and we successfully detected signal variables from them.">Identification - Domain Flow Architecture -

Identification

Identification is the act of recognizing the signal in the presence of noise.

When there are signals and noises, physicists try to identify signals by modeling them, +

Identification

Identification is the act of recognizing the signal in the presence of noise.

When there are signals and noises, physicists try to identify signals by modeling them, whereas statisticians oppositely try to model noise to identify signals. In this study, we applied the statisticians’ concept of signal detection of physics data with small-size samples and high dimensions without modeling the signals. Most of the data in nature, @@ -22,12 +22,12 @@ to the data generated by globally coupled map, which can produce time series data with different correlation lengths, and also applied to gene expression data, which are typical static data of small-size samples with high dimensions, and we successfully -detected signal variables from them.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch8/index.html b/ch8/index.html index b3ec87b..9443fe3 100644 --- a/ch8/index.html +++ b/ch8/index.html @@ -1,13 +1,13 @@ Digital Signal Processing - Domain Flow Architecture -

Digital Signal Processing

Digital Signal Processing is the discrete realization of Analog Signal Processing +

Digital Signal Processing

Digital Signal Processing is the discrete realization of Analog Signal Processing operations used to condition, amplify, characterize, and transform. Digital Signal Processing is essential when interfacing a digital computer -to a physical process to enable reproducible and high-fidelity applications.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch8/spectral/index.html b/ch8/spectral/index.html index cf80191..ed71d40 100644 --- a/ch8/spectral/index.html +++ b/ch8/spectral/index.html @@ -1,10 +1,10 @@ Spectral Analysis - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/transforms/index.html b/ch8/transforms/index.html index 3951f7e..a380d81 100644 --- a/ch8/transforms/index.html +++ b/ch8/transforms/index.html @@ -1,10 +1,10 @@ Transforms - Domain Flow Architecture -
\ No newline at end of file diff --git a/contentdev/index.html b/contentdev/index.html index 1dfce1c..d702704 100644 --- a/contentdev/index.html +++ b/contentdev/index.html @@ -1,11 +1,11 @@ Content Development - Domain Flow Architecture -

Content Development

The following pages are examples for content developers to quickly add interactive -content that aids in understanding parallel algorithm design and optimization.

\ No newline at end of file diff --git a/contentdev/prototype/index.html b/contentdev/prototype/index.html index abd706b..4468f0c 100644 --- a/contentdev/prototype/index.html +++ b/contentdev/prototype/index.html @@ -3,7 +3,7 @@ All you need is a tag with an id and some CSS styling and a call into an animation program that fills that canvas.">Prototype - Domain Flow Architecture -

Prototype

Prototype

This is a basic skeleton of a Hugo Markdown page that includes a three.js animation.

All you need is a <canvas> tag with an id and some CSS styling and a call into -an animation program that fills that canvas.

\ No newline at end of file + 
\ No newline at end of file diff --git a/search/index.html b/search/index.html index 04dc7f0..d6ef4a3 100644 --- a/search/index.html +++ b/search/index.html @@ -1,11 +1,11 @@ Search - Domain Flow Architecture -

Search

-

\ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 309b131..f73b3d6 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -1 +1 @@ -https://stillwater-sc.github.io/domain-flow/ch3-design/currentstate/index.html2017-02-17T08:59:30-05:00https://stillwater-sc.github.io/domain-flow/ch6/matrixkernels/index.html2017-02-15T07:57:10-05:00https://stillwater-sc.github.io/domain-flow/ch5/factorization/index.html2017-02-15T07:56:44-05:00https://stillwater-sc.github.io/domain-flow/ch7/solvers/index.html2017-02-15T07:55:55-05:00https://stillwater-sc.github.io/domain-flow/ch1/index.html2017-02-15T06:49:44-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.html2025-01-06T16:43:57-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/ch7/lu/index.html2017-02-15T07:56:20-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/elements/index.html2017-02-15T07:43:18-05:00https://stillwater-sc.github.io/domain-flow/ch1/example/index.html2017-02-15T07:00:21-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.html2024-01-06T16:44:37-05:00https://stillwater-sc.github.io/domain-flow/ch8/filters/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/ch3-design/time/index.html2017-02-15T07:48:27-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/index.html2017-02-15T07:42:59-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/space/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/ch4/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/parallel-programming/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/control/index.html2025-01-07T15:31:27-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.html2024-01-06T16:46:11-05:00https://stillwater-sc.github.io/domain-flow/ch8/spectral/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/ch4/level1/index.html2017-02-15T07:55:08-05:00https://stillwater-sc.github.io/domain-flow/ch5/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/energy/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/transforms/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/ch6/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/computational-spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.html2025-01-07T13:20:04-05:00https://stillwater-sc.github.io/domain-flow/ch7/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/domain-flow/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch8/index.html2023-05-29T16:50:01-05:00https://stillwater-sc.github.io/domain-flow/ch1/freeschedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/ch1/linearschedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/contentdev/index.html2017-02-15T06:49:44-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.html2025-01-06T16:49:53-05:00https://stillwater-sc.github.io/domain-flow/ch4/level2/index.html2017-02-15T07:55:18-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/nextsteps/index.html2017-02-15T07:49:53-05:00https://stillwater-sc.github.io/domain-flow/ch1/derivation/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/switching-energy/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch1/wavefront/index.htmlhttps://stillwater-sc.github.io/domain-flow/ch1/nextsteps/index.html2025-01-06T16:51:23-05:00https://stillwater-sc.github.io/domain-flow/ch4/level3/index.html2017-02-15T07:55:23-05:00https://stillwater-sc.github.io/domain-flow/contentdev/prototype/index.htmlhttps://stillwater-sc.github.io/domain-flow/index.html2025-01-07T15:31:27-05:00https://stillwater-sc.github.io/domain-flow/tags/algorithm/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/dsp/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/dsp/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/transform/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/transforming/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/analyzing/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/tags/spectral-analysis/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/tags/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/categories/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/tags/filtering/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/categories/filtering/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/tags/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/categories/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/tags/derivation/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/design/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/domain-flow/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/domain-flow/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/free-schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/linear-schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/matrix-math/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/matrix-multiply/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/introduction/index.html2017-02-15T07:00:21-05:00https://stillwater-sc.github.io/domain-flow/tags/computational-spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/tags/index-space/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/tags/lattice/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/categories/spacetime/index.html2017-02-15T06:58:22-05:00 \ No newline at end of file +https://stillwater-sc.github.io/domain-flow/ch3-design/currentstate/index.html2017-02-17T08:59:30-05:00https://stillwater-sc.github.io/domain-flow/ch6/matrixkernels/index.html2017-02-15T07:57:10-05:00https://stillwater-sc.github.io/domain-flow/ch5/factorization/index.html2017-02-15T07:56:44-05:00https://stillwater-sc.github.io/domain-flow/ch7/solvers/index.html2017-02-15T07:55:55-05:00https://stillwater-sc.github.io/domain-flow/ch1/index.html2017-02-15T06:49:44-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/spm/index.html2025-01-06T16:43:57-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/ch7/lu/index.html2017-02-15T07:56:20-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/elements/index.html2017-02-15T07:43:18-05:00https://stillwater-sc.github.io/domain-flow/ch1/example/index.html2017-02-15T07:00:21-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/taxonomy/index.html2025-01-07T13:20:04-05:00https://stillwater-sc.github.io/domain-flow/ch8/filters/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/ch3-design/time/index.html2017-02-15T07:48:27-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/index.html2017-02-15T07:42:59-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/dfm/index.html2025-01-06T16:44:37-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/space/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/ch4/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/parallel-programming/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/control/index.html2025-01-07T15:31:27-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/dfa/index.html2025-01-06T16:46:11-05:00https://stillwater-sc.github.io/domain-flow/ch8/spectral/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/ch4/level1/index.html2017-02-15T07:55:08-05:00https://stillwater-sc.github.io/domain-flow/ch5/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/energy/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch8/transforms/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/ch6/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/computational-spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch7/index.html2017-02-15T07:54:08-05:00https://stillwater-sc.github.io/domain-flow/ch1/domain-flow/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/ch8/index.html2023-05-29T16:50:01-05:00https://stillwater-sc.github.io/domain-flow/ch1/freeschedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/ch1/linearschedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/contentdev/index.html2017-02-15T06:49:44-05:00https://stillwater-sc.github.io/domain-flow/ch2-moc/nextsteps/index.html2025-01-06T16:49:53-05:00https://stillwater-sc.github.io/domain-flow/ch4/level2/index.html2017-02-15T07:55:18-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/nextsteps/index.html2017-02-15T07:49:53-05:00https://stillwater-sc.github.io/domain-flow/ch1/derivation/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/ch3-design/switching-energy/index.html2025-01-05T13:39:38-05:00https://stillwater-sc.github.io/domain-flow/ch1/wavefront/index.htmlhttps://stillwater-sc.github.io/domain-flow/ch1/nextsteps/index.html2025-01-06T16:51:23-05:00https://stillwater-sc.github.io/domain-flow/ch4/level3/index.html2017-02-15T07:55:23-05:00https://stillwater-sc.github.io/domain-flow/contentdev/prototype/index.htmlhttps://stillwater-sc.github.io/domain-flow/index.html2025-01-07T15:31:27-05:00https://stillwater-sc.github.io/domain-flow/tags/algorithm/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/dsp/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/dsp/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/tags/transform/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/transforming/index.html2023-05-29T17:05:17+00:00https://stillwater-sc.github.io/domain-flow/categories/analyzing/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/tags/spectral-analysis/index.html2023-05-29T17:04:34+00:00https://stillwater-sc.github.io/domain-flow/tags/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/categories/identification/index.html2023-05-29T16:58:21+00:00https://stillwater-sc.github.io/domain-flow/tags/filtering/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/categories/filtering/index.html2023-05-29T16:57:39+00:00https://stillwater-sc.github.io/domain-flow/tags/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/categories/conditioning/index.html2023-05-29T16:55:11+00:00https://stillwater-sc.github.io/domain-flow/tags/derivation/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/design/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/domain-flow/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/domain-flow/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/free-schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/linear-schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/matrix-math/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/tags/matrix-multiply/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/schedule/index.html2017-02-15T07:24:38-05:00https://stillwater-sc.github.io/domain-flow/categories/introduction/index.html2017-02-15T07:00:21-05:00https://stillwater-sc.github.io/domain-flow/tags/computational-spacetime/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/tags/index-space/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/tags/lattice/index.html2017-02-15T06:58:22-05:00https://stillwater-sc.github.io/domain-flow/categories/spacetime/index.html2017-02-15T06:58:22-05:00 \ No newline at end of file diff --git a/tags/algorithm/index.html b/tags/algorithm/index.html index b0791f4..f461037 100644 --- a/tags/algorithm/index.html +++ b/tags/algorithm/index.html @@ -1,10 +1,10 @@ Algorithm - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/computational-spacetime/index.html b/tags/computational-spacetime/index.html index 941f077..842365b 100644 --- a/tags/computational-spacetime/index.html +++ b/tags/computational-spacetime/index.html @@ -1,10 +1,10 @@ Computational-Spacetime - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/conditioning/index.html b/tags/conditioning/index.html index cbb5a42..7a373c2 100644 --- a/tags/conditioning/index.html +++ b/tags/conditioning/index.html @@ -1,10 +1,10 @@ Conditioning - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/derivation/index.html b/tags/derivation/index.html index d94a728..f40fa2a 100644 --- a/tags/derivation/index.html +++ b/tags/derivation/index.html @@ -1,10 +1,10 @@ Derivation - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/domain-flow/index.html b/tags/domain-flow/index.html index 424fa16..b22b0c5 100644 --- a/tags/domain-flow/index.html +++ b/tags/domain-flow/index.html @@ -1,10 +1,10 @@ Domain-Flow - Tag - Domain Flow Architecture -

Tag - Domain-Flow

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/tags/dsp/index.html b/tags/dsp/index.html index 9859832..ee0b7d4 100644 --- a/tags/dsp/index.html +++ b/tags/dsp/index.html @@ -1,10 +1,10 @@ Dsp - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/filtering/index.html b/tags/filtering/index.html index 12ffe2f..214cfc2 100644 --- a/tags/filtering/index.html +++ b/tags/filtering/index.html @@ -1,10 +1,10 @@ Filtering - Tag - Domain Flow Architecture -

Tag - Filtering

D

\ No newline at end of file diff --git a/tags/free-schedule/index.html b/tags/free-schedule/index.html index 8aa95f9..e6d543e 100644 --- a/tags/free-schedule/index.html +++ b/tags/free-schedule/index.html @@ -1,10 +1,10 @@ Free-Schedule - Tag - Domain Flow Architecture -

Tag - Free-Schedule

F

\ No newline at end of file diff --git a/tags/identification/index.html b/tags/identification/index.html index 041c8a1..55b86ef 100644 --- a/tags/identification/index.html +++ b/tags/identification/index.html @@ -1,10 +1,10 @@ Identification - Tag - Domain Flow Architecture -

Tag - Identification

I

\ No newline at end of file diff --git a/tags/index-space/index.html b/tags/index-space/index.html index 271c094..cd5dd27 100644 --- a/tags/index-space/index.html +++ b/tags/index-space/index.html @@ -1,10 +1,10 @@ Index-Space - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/index.html b/tags/index.html index 7a9a253..798cced 100644 --- a/tags/index.html +++ b/tags/index.html @@ -1,10 +1,10 @@ Tags - Domain Flow Architecture - \ No newline at end of file diff --git a/tags/lattice/index.html b/tags/lattice/index.html index 3702d1e..9351848 100644 --- a/tags/lattice/index.html +++ b/tags/lattice/index.html @@ -1,10 +1,10 @@ Lattice - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/linear-schedule/index.html b/tags/linear-schedule/index.html index 5e99374..fec9a02 100644 --- a/tags/linear-schedule/index.html +++ b/tags/linear-schedule/index.html @@ -1,10 +1,10 @@ Linear-Schedule - Tag - Domain Flow Architecture -

Tag - Linear-Schedule

L

\ No newline at end of file diff --git a/tags/matrix-multiply/index.html b/tags/matrix-multiply/index.html index 67fc8f5..16607d4 100644 --- a/tags/matrix-multiply/index.html +++ b/tags/matrix-multiply/index.html @@ -1,10 +1,10 @@ Matrix-Multiply - Tag - Domain Flow Architecture -

Tag - Matrix-Multiply

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/tags/spectral-analysis/index.html b/tags/spectral-analysis/index.html index c47bea7..55df303 100644 --- a/tags/spectral-analysis/index.html +++ b/tags/spectral-analysis/index.html @@ -1,10 +1,10 @@ Spectral-Analysis - Tag - Domain Flow Architecture -

Tag - Spectral-Analysis

S

\ No newline at end of file diff --git a/tags/transform/index.html b/tags/transform/index.html index 6c104f9..e87e3e4 100644 --- a/tags/transform/index.html +++ b/tags/transform/index.html @@ -1,10 +1,10 @@ Transform - Tag - Domain Flow Architecture -

Tag - Transform

T

  • Transforms
\ No newline at end of file