Index: docs/manual/collateindex.pl =================================================================== RCS file: collateindex.pl diff -N collateindex.pl *** /dev/null Fri Mar 23 21:37:44 2001 --- collateindex.pl Mon Dec 3 14:01:51 2001 *************** *** 0 **** --- 1,596 ---- + # -*- Perl -*- + # + # $Id: collateindex.pl,v 1.12 2000/01/27 15:07:15 nwalsh Exp $ + + use Getopt::Std; + + $usage = "Usage: $0 file + Where are: + -p Link to points in the document. The default is to link + to the closest containing section. + -g Group terms with IndexDiv based on the first letter + of the term (or its sortas attribute). + (This probably doesn't handle i10n particularly well) + -s name Name the IndexDiv that contains symbols. The default + is 'Symbols'. Meaningless if -g is not used. + -t name Title for the index. + -P file Read a preamble from file. The content of file will + be inserted before the tag. + -i id The ID for the tag. + -o file Output to file. Defaults to stdout. + -S scope Scope of the index, must be 'all', 'local', or 'global'. + If unspecified, 'all' is assumed. + -I scope The implied scope, must be 'all', 'local', or 'global'. + IndexTerms which do not specify a scope will have the + implied scope. If unspecified, 'all' is assumed. + -x Make a SetIndex. + -f Force the output file to be written, even if it appears + to have been edited by hand. + -N New index (generates an empty index file). + file The file containing index data generated by Jade + with the DocBook HTML Stylesheet.\n"; + + die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x'); + + $linkpoints = $opt_p; + $lettergroups = $opt_g; + $symbolsname = $opt_s || "Symbols"; + $title = $opt_t; + $preamble = $opt_P; + $outfile = $opt_o || '-'; + $indexid = $opt_i; + $scope = uc($opt_S) || 'ALL'; + $impliedscope = uc($opt_I) || 'ALL'; + $setindex = $opt_x; + $forceoutput = $opt_f; + $newindex = $opt_N; + $debug = $opt_D; + + $indextag = $setindex ? 'setindex' : 'index'; + + if ($newindex) { + safe_open(*OUT, $outfile); + if ($indexid) { + print OUT "<$indextag id='$indexid'>\n\n"; + } else { + print OUT "<$indextag>\n\n"; + } + + print OUT "\n"; + print OUT "\n"; + + print OUT "\n"; + exit 0; + } + + $dat = shift @ARGV || die $usage; + die "$0: cannot find $dat.\n" if ! -f $dat; + + %legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1); + if ($scope && !$legal_scopes{$scope}) { + die "Invalid scope.\n$usage\n"; + } + if ($impliedscope && !$legal_scopes{$impliedscope}) { + die "Invalid implied scope.\n$usage\n"; + } + + @term = (); + %id = (); + + $termcount = 0; + + print STDERR "Processing $dat...\n"; + + # Read the index file, creating an array of objects. Each object + # represents and indexterm and has fields for the content of the + # indexterm + + open (F, $dat); + while () { + chop; + + if (/^\/indexterm/i) { + push (@term, $idx); + next; + } + + if (/^indexterm (.*)$/i) { + $termcount++; + $idx = {}; + $idx->{'zone'} = {}; + $idx->{'href'} = $1; + $idx->{'count'} = $termcount; + $idx->{'scope'} = $impliedscope; + next; + } + + if (/^indexpoint (.*)$/i) { + $idx->{'hrefpoint'} = $1; + next; + } + + if (/^title (.*)$/i) { + $idx->{'title'} = $1; + next; + } + + if (/^primary[\[ ](.*)$/i) { + if (/^primary\[(.*?)\] (.*)$/i) { + $idx->{'psortas'} = $1; + $idx->{'primary'} = $2; + } else { + $idx->{'psortas'} = $1; + $idx->{'primary'} = $1; + } + next; + } + + if (/^secondary[\[ ](.*)$/i) { + if (/^secondary\[(.*?)\] (.*)$/i) { + $idx->{'ssortas'} = $1; + $idx->{'secondary'} = $2; + } else { + $idx->{'ssortas'} = $1; + $idx->{'secondary'} = $1; + } + next; + } + + if (/^tertiary[\[ ](.*)$/i) { + if (/^tertiary\[(.*?)\] (.*)$/i) { + $idx->{'tsortas'} = $1; + $idx->{'tertiary'} = $2; + } else { + $idx->{'tsortas'} = $1; + $idx->{'tertiary'} = $1; + } + next; + } + + if (/^see (.*)$/i) { + $idx->{'see'} = $1; + next; + } + + if (/^seealso (.*)$/i) { + $idx->{'seealso'} = $1; + next; + } + + if (/^significance (.*)$/i) { + $idx->{'significance'} = $1; + next; + } + + if (/^class (.*)$/i) { + $idx->{'class'} = $1; + next; + } + + if (/^scope (.*)$/i) { + $idx->{'scope'} = uc($1); + next; + } + + if (/^startref (.*)$/i) { + $idx->{'startref'} = $1; + next; + } + + if (/^id (.*)$/i) { + $idx->{'id'} = $1; + $id{$1} = $idx; + next; + } + + if (/^zone (.*)$/i) { + my($href) = $1; + $_ = scalar(); + chop; + die "Bad zone: $_\n" if !/^title (.*)$/i; + $idx->{'zone'}->{$href} = $1; + next; + } + + die "Unrecognized: $_\n"; + } + close (F); + + print STDERR "$termcount entries loaded...\n"; + + # Fixup the startrefs... + # In DocBook, STARTREF is a #CONREF attribute; support this by copying + # all of the fields from the indexterm with the id specified by STARTREF + # to the indexterm that has the STARTREF. + foreach $idx (@term) { + my($ididx, $field); + if ($idx->{'startref'}) { + $ididx = $id{$idx->{'startref'}}; + foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso', + 'psortas', 'ssortas', 'tsortas', 'significance', + 'class', 'scope') { + $idx->{$field} = $ididx->{$field}; + } + } + } + + # Sort the index terms + @term = sort termsort @term; + + # Move all of the non-alphabetic entries to the front of the index. + @term = sortsymbols(@term); + + safe_open(*OUT, $outfile); + + # Write the index... + if ($indexid) { + print OUT "<$indextag id='$indexid'>\n\n"; + } else { + print OUT "<$indextag>\n\n"; + } + + print OUT "\n"; + print OUT "\n"; + + print OUT "\n\n"; + + print OUT "$title\n\n" if $title; + + $last = {}; # the last indexterm we processed + $first = 1; # this is the first one + $group = ""; # we're not in a group yet + $lastout = ""; # we've not put anything out yet + + foreach $idx (@term) { + next if $idx->{'startref'}; # no way to represent spans... + next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL'); + next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL'); + next if &same($idx, $last); # suppress duplicates + + $termcount--; + + # If primary changes, output a whole new index term, otherwise just + # output another secondary or tertiary, as appropriate. We know from + # sorting that the terms will always be in the right order. + if (!&tsame($last, $idx, 'primary')) { + print "DIFF PRIM\n" if $debug; + &end_entry() if not $first; + + if ($lettergroups) { + # If we're grouping, make the right indexdivs + $letter = $idx->{'psortas'}; + $letter = $idx->{'primary'} if !$letter; + $letter = uc(substr($letter, 0, 1)); + + # symbols are a special case + if (($letter lt 'A') || ($letter gt 'Z')) { + if (($group eq '') + || (($group ge 'A') && ($group le 'Z'))) { + print OUT "\n" if !$first; + print OUT "$symbolsname\n\n"; + $group = $letter; + } + } elsif (($group eq '') || ($group ne $letter)) { + print OUT "\n" if !$first; + print OUT "$letter\n\n"; + $group = $letter; + } + } + + $first = 0; # there can only be on first ;-) + + print OUT "\n"; + print OUT " ", $idx->{'primary'}; + $lastout = "primaryie"; + + if ($idx->{'secondary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'secondary'}; + $lastout = "secondaryie"; + }; + + if ($idx->{'tertiary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } elsif (!&tsame($last, $idx, 'secondary')) { + print "DIFF SEC\n" if $debug; + + print OUT "\n \n" if $lastout; + + print OUT " ", $idx->{'secondary'}; + $lastout = "secondaryie"; + if ($idx->{'tertiary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } elsif (!&tsame($last, $idx, 'tertiary')) { + print "DIFF TERT\n" if $debug; + + print OUT "\n \n" if $lastout; + + if ($idx->{'tertiary'}) { + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } + + &print_term($idx); + + $last = $idx; + } + + # Termcount is > 0 iff some entries were skipped. + print STDERR "$termcount entries ignored...\n"; + + &end_entry(); + + print OUT "\n" if $lettergroups; + print OUT "\n"; + + close (OUT); + + print STDERR "Done.\n"; + + sub same { + my($a) = shift; + my($b) = shift; + + my($aP) = $a->{'psortas'} || $a->{'primary'}; + my($aS) = $a->{'ssortas'} || $a->{'secondary'}; + my($aT) = $a->{'tsortas'} || $a->{'tertiary'}; + + my($bP) = $b->{'psortas'} || $b->{'primary'}; + my($bS) = $b->{'ssortas'} || $b->{'secondary'}; + my($bT) = $b->{'tsortas'} || $b->{'tertiary'}; + + my($same); + + $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP); + $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS); + $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT); + $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP); + $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS); + $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT); + + # print "[$aP]=[$bP]\n"; + # print "[$aS]=[$bS]\n"; + # print "[$aT]=[$bT]\n"; + + # Two index terms are the same if: + # 1. the primary, secondary, and tertiary entries are the same + # (or have the same SORTAS) + # AND + # 2. They occur in the same titled section + # AND + # 3. They point to the same place + # + # Notes: Scope is used to suppress some entries, but can't be used + # for comparing duplicates. + # Interpretation of "the same place" depends on whether or + # not $linkpoints is true. + + $same = (($aP eq $bP) + && ($aS eq $bS) + && ($aT eq $bT) + && ($a->{'title'} eq $b->{'title'}) + && ($a->{'href'} eq $b->{'href'})); + + # If we're linking to points, they're only the same if they link + # to exactly the same spot. (surely this is redundant?) + $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'}) + if $linkpoints; + + $same; + } + + sub tsame { + # Unlike same(), tsame only compares a single term + my($a) = shift; + my($b) = shift; + my($term) = shift; + my($sterm) = substr($term, 0, 1) . "sortas"; + my($A, $B); + + $A = $a->{$sterm} || $a->{$term}; + $B = $b->{$sterm} || $b->{$term}; + + $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A); + $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B); + + return $A eq $B; + } + + sub end_entry { + # End any open elements... + print OUT "\n \n" if $lastout; + print OUT "\n\n"; + $lastout = ""; + } + + sub print_term { + # Print out the links for an indexterm. There can be more than + # one if the term has a ZONE that points to more than one place. + # (do we do the right thing in that case?) + my($idx) = shift; + my($key, $indent, @hrefs); + my(%href) = (); + my(%phref) = (); + + $indent = " "; + + if ($idx->{'see'}) { + # it'd be nice to make this a link... + if ($lastout) { + print OUT "\n \n"; + $lastout = ""; + } + print OUT $indent, "", $idx->{'see'}, "\n"; + return; + } + + if ($idx->{'seealso'}) { + # it'd be nice to make this a link... + if ($lastout) { + print OUT "\n \n"; + $lastout = ""; + } + print OUT $indent, "", $idx->{'seealso'}, "\n"; + return; + } + + if (keys %{$idx->{'zone'}}) { + foreach $key (keys %{$idx->{'zone'}}) { + $href{$key} = $idx->{'zone'}->{$key}; + $phref{$key} = $idx->{'zone'}->{$key}; + } + } else { + $href{$idx->{'href'}} = $idx->{'title'}; + $phref{$idx->{'href'}} = $idx->{'hrefpoint'}; + } + + # We can't use because we don't know the ID of the term in the + # original source (and, in fact, it might not have one). + print OUT ",\n"; + @hrefs = keys %href; + while (@hrefs) { + my($linkend) = ""; + my($role) = ""; + $key = shift @hrefs; + if ($linkpoints) { + $linkend = $phref{$key}; + } else { + $linkend = $key; + } + + $role = $linkend; + $role = $1 if $role =~ /\#(.*)$/; + + print OUT $indent; + print OUT ""; + print OUT "" if ($idx->{'significance'} eq 'PREFERRED'); + print OUT $href{$key}; + print OUT "" if ($idx->{'significance'} eq 'PREFERRED'); + print OUT ""; + } + } + + sub termsort { + my($aP) = $a->{'psortas'} || $a->{'primary'}; + my($aS) = $a->{'ssortas'} || $a->{'secondary'}; + my($aT) = $a->{'tsortas'} || $a->{'tertiary'}; + my($ap) = $a->{'count'}; + + my($bP) = $b->{'psortas'} || $b->{'primary'}; + my($bS) = $b->{'ssortas'} || $b->{'secondary'}; + my($bT) = $b->{'tsortas'} || $b->{'tertiary'}; + my($bp) = $b->{'count'}; + + $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP); + $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS); + $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT); + $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP); + $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS); + $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT); + + if ($aP eq $bP) { + if ($aS eq $bS) { + if ($aT eq $bT) { + # make sure seealso's always sort to the bottom + return 1 if ($a->{'seealso'}); + return -1 if ($b->{'seealso'}); + # if everything else is the same, keep these elements + # in document order (so the index links are in the right + # order) + return $ap <=> $bp; + } else { + return $aT cmp $bT; + } + } else { + return $aS cmp $bS; + } + } else { + return $aP cmp $bP; + } + } + + sub sortsymbols { + my(@term) = @_; + my(@new) = (); + my(@sym) = (); + my($letter); + my($idx); + + # Move the non-letter things to the front. Should digits be thier + # own group? Maybe... + foreach $idx (@term) { + $letter = $idx->{'psortas'}; + $letter = $idx->{'primary'} if !$letter; + $letter = uc(substr($letter, 0, 1)); + + if (($letter lt 'A') || ($letter gt 'Z')) { + push (@sym, $idx); + } else { + push (@new, $idx); + } + } + + return (@sym, @new); + } + + sub safe_open { + local(*OUT) = shift; + local(*F, $_); + + if (($outfile ne '-') && (!$forceoutput)) { + my($handedit) = 1; + if (open (OUT, $outfile)) { + while () { + if (//){ + $handedit = 0; + last; + } + } + close (OUT); + } else { + $handedit = 0; + } + + if ($handedit) { + print "\n$outfile appears to have been edited by hand; use -f or\n"; + print " change the output file.\n"; + exit 1; + } + } + + open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n"; + + if ($preamble) { + # Copy the preamble + if (open(F, $preamble)) { + while () { + print OUT $_; + } + close(F); + } else { + warn "$0: cannot open preamble $preamble.\n"; + } + } + } Index: docs/manual/makefile =================================================================== RCS file: makefile diff -N makefile *** /dev/null Fri Mar 23 21:37:44 2001 --- makefile Mon Dec 3 14:01:51 2001 *************** *** 0 **** --- 1,69 ---- + ### Oldham, Jeffrey D. + ### 1997 Dec 26 + ### misc + ### + ### LaTeX -> PostScript/PDF/WWW + ### XML -> TeX/DVI/PS/PDF + + # Definitions for PostScript and WWW Creation + TEX= latex + WWWHOMEDIR= /u/oldham/www + LATEX2HTML= latex2html + BASICLATEX2HTMLOPTIONS= -info "" -no_footnode -no_math -html_version 3.2,math + #LATEX2HTMLOPTIONS= -local_icons -split +1 $(BASICLATEX2HTMLOPTIONS) + LATEX2HTMLOPTIONS= -no_navigation -split 0 $(BASICLATEX2HTMLOPTIONS) + MPOST= mpost + + # Definitions for Jade. + JADEDIR= /usr/lib/sgml/stylesheets/docbook + PRINTDOCBOOKDSL= print/docbook.dsl + HTMLDOCBOOKDSL= html/docbook.dsl + XML= dtds/decls/xml.dcl + INDEXOPTIONS= -t 'Index' -i 'index' -g -p + + CXXFLAGS= -g -Wall -pedantic -W -Wstrict-prototypes -Wpointer-arith -Wbad-function-cast -Wcast-align -Wconversion -Wnested-externs -Wundef -Winline -static + + all: outline.ps + + %.all: %.ps %.pdf %.html + chmod 644 $*.ps $*.pdf + mv $*.ps $*.pdf $* + + %.dvi: %.ltx + $(TEX) $< + # bibtex $* + # $(TEX) $< + $(TEX) $< + + %.ps: %.dvi + dvips -t letter $< -o + + %.pdf.ltx: %.ltx + sed -e 's/^%\\usepackage{times}/\\usepackage{times}/' $< > $@ + + %.pdf: %.pdf.ps + ps2pdf $< $@ + + # This rule assumes index creation. + %.dvi: %.xml genindex.sgm + jade -D$(JADEDIR) -t sgml -d $(HTMLDOCBOOKDSL) -V html-index $(XML) $< + perl collateindex.pl $(INDEXOPTIONS) -o genindex.sgm HTML.index + jade -D$(JADEDIR) -t tex -d $(PRINTDOCBOOKDSL) $(XML) $< && jadetex $*.tex && jadetex $*.tex && jadetex $*.tex + + genindex.sgm: + perl collateindex.pl $(INDEXOPTIONS) -N -o $@ + + %.html: %.xml + jade -D$(JADEDIR) -t sgml -d $(HTMLDOCBOOKDSL) $(XML) $< + + %.pdf: %.xml + jade -D$(JADEDIR) -t tex -d $(PRINTDOCBOOKDSL) $(XML) $< && pdfjadetex $*.tex && pdfjadetex $*.tex + + mproof-%.ps: %.mp + declare -x TEX=latex && $(MPOST) $< && tex mproof.tex $*.[0-9]* && dvips mproof.dvi -o $@ + + %.txt: %.ltx + detex $< > $@ + + clean: + rm -f *.dvi *.aux *.log *.toc *.bak *.blg *.bbl *.glo *.idx *.lof *.lot *.htm *.mpx mpxerr.tex HTML.index outline.tex Index: docs/manual/outline.xml =================================================================== RCS file: outline.xml diff -N outline.xml *** /dev/null Fri Mar 23 21:37:44 2001 --- outline.xml Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,4287 ---- + + + + + + + + + + + + + + + + + + + C"> + + + C++"> + + + Cheetah" > + + Doof2d" > + + Make"> + + MM"> + + MPI"> + + PDToolkit"> + + PETE"> + + POOMA"> + + POOMA Toolkit"> + + Purify"> + + Smarts"> + + + STL"> + + Tau"> + + + + + Array"> + + Benchmark"> + + Brick"> + + CompressibleBrick"> + + DistributedTag"> + + Domain"> + + double"> + + DynamicArray"> + + Engine"> + + Field"> + + Interval"> + + Layout"> + + LeafFunctor"> + + MultiPatch"> + + ReplicatedTag"> + + Stencil"> + + Vector"> + + + + + + + + + g++"> + + KCC"> + + Linux"> + + + + + http://pooma.codesourcery.com/pooma/download'> + + + http://www.pooma.com/'> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]> + + + + &pooma; + A &cc; Toolkit for High-Performance Parallel Scientific Computing + JeffreyD.Oldham + + CodeSourcery, LLC + + + 2001CodeSourcery, LLC () + Los Alamos National Laboratory + + + All rights reserved. This document may not be redistributed in any form without the express permission of the author. + + + + 0.01 + 2001 Nov 26 + jdo + first draft + + + + + + + + + Preface + + FINISH: Describe the target audience for &pooma; programs and + for this manual: &cc; programmers writing scientific code, possibly + parallel execution. + + Assume familiarity with &cc; template programming and the + standard template library. FIXME: Remove this index + entry.Oldham, + Jeffrey D. + +
+ Notation + + UNFINISHED +
+ + +
+ How to Read This &Book; + + FINISH: Write this section in a style similar to Lamport's + LaTeX section 1.2. FINISH: Fix the book title and the section + number. +
+ + +
+ Obtaining &pooma; and Sample Programs + + Available for free from what WWW site? Include what portions + of LICENSE? Be sure to + include CVS instructions as well. + + Which additional packages are necessary and when? + +
+ + +
+ Using and Modifying &pooma; + + &pooma; is available under open source license. It can be + used and modified by anyone, anywhere. Can it be sold? Include + LICENSE. + + QUESTION: How do developers contribute code? + +
+ +
+ + + + Programming with &pooma; + + + Introduction + + QUESTION: Add a partintro to the part above? + + &pooma; abbreviates Parallel Object-Oriented Methods + and Application. + + This document is an introduction to &pooma; v2.1, a &cc; + toolkit for high-performance scientific computation. &pooma; + runs efficiently on single-processor desktop machines, + shared-memory multiprocessors, and parallel supercomputers + containing dozens or hundreds of processors. What's more, by making + extensive use of the advanced features of the ANSI/ISO &cc; + standard—particularly templates—&pooma; presents a + compact, easy-to-read interface to its users. + + From Section  of + papers/iscope98.pdf: + + Scientific software developers have struggled with the need + to express mathematical abstractions in an elegant and maintainable + way without sacrificing performance. The &pooma; (Parallel + Object-Oriented Methods and Applications) framework, written in + ANSI/ISO &cc;, has + demonstrated both high expressiveness and high performance for + large-scale scientific applications on platforms ranging from + workstations to massively parallel supercomputers. &pooma; provides + high-level abstractions for multidimensional arrays, physical + meshes, mathematical fields, and sets of particles. &pooma; also + exploits techniques such as expression templates to optimize serial + performance while encapsulating the details of parallel + communication and supporting block-based data compression. + Consequently, scientists can quickly assemble parallel simulation + codes by focusing directly on the physical abstractions relevant to + the system under study and not the technical difficulties of + parallel communication and machine-specific optimization. + + ADD: diagram of science and &pooma;. See the diagram that + Mark and I wrote. + + +
+ Evolution of &pooma; + + QUESTION: Is this interesting? Even if it is, it should be + short. + + The file papers/SCPaper-95.html + describes ?&pooma;1? and its abstraction layers. + + The "Introduction" of + papers/Siam0098.ps describes the DoE's + funding motivation for &pooma;: Accelerated Strategic Computing + Initiative (ASCI) and Science-based Stockpile Stewardship (SBSS), + pp. 1–2. + + See list of developers on p. 1 of + papers/pooma.ps. + + See list of developers on p. 1 of + papers/pooma.ps. See history and motivation + on p. 3 of papers/pooma.ps. + + Use README for + information. + +
+ introduction.html + + &pooma; was designed and implemented by scientists working + at the Los Alamos National Laboratory's Advanced Computing + Laboratory. Between them, these scientists have written and tuned + large applications on almost every commercial and experimental + supercomputer built in the last two decades. As the technology + used in those machines migrates down into departmental computing + servers and desktop multiprocessors, &pooma; is a vehicle for its + designers' experience to migrate as well. In particular, + &pooma;'s authors understand how to get good performance out of + modern architectures, with their many processors and multi-level + memory hierarchies, and how to handle the subtly complex problems + that arise in real-world applications. +
+ +
+ +
+ + + + A Tutorial Introduction + + UPDATE: In the following paragraph, fix the cross-reference + to the actual section. + + &pooma; provides different containers and processor + configurations and supports different implementation styles, as + described in . In this + chapter, we present several different implementations of the + &doof2d; two-dimensional diffusion simulation program: + + + a C-style implementation omitting any use of &pooma; + computing each array element individually, + + + a &pooma; &array; implementation computing each array + element individually, + + + a &pooma; &array; implementation using data-parallel + statements, + + + a &pooma; &array; implementation using stencils, which + support local computations, + + + a stencil-based &pooma; &array; implementation supporting + computation on multiple processors + + + a &pooma; &field; implementation using data-parallel + statements, and + + + a data-parallel &pooma; &field; implementation for + multi-processor execution. + + + + These illustrate the &array;, &field;, &engine;, layout, + mesh, and domain data types. They also illustrate various + immediate computation styles (element-wise accesses, data-parallel + expressions, and stencil computation) and various processor + configurations (one sequential processor and multiple + processors). + +
+ &doof2d; Averagings + + + + + + The Initial Configuration + + + + + + + + After the First Averaging + + + + + + + + After the Second Averaging + + +
+ + The &doof2d; diffusion program starts with a two-dimensional + grid of values. To model an initial density, all grid values are + zero except for one nonzero value in the center. Each averaging, + each grid element, except the outermost ones, updates its value by + averaging its value and its eight neighbors. To avoid overwriting + grid values before all their uses occur, we use two arrays, reading + the first and writing the second and then reversing their roles + within each iteration. + + Figure + illustrates the averagings. Initially, only the center element has + nonzero value. To form the first averaging, each element's new + value equals the average of its and its neighbors' previous values. + Thus, the initial nonzero value spreads to a three-by-three grid. + The averaging continues, spreading to a five-by-five grid of + nonzero values. Values in outermost grid cells are always + zero. + + Before presenting various implementations of %doof2d;, we + explain how to install the &poomaToolkit;. + + REMOVE: &doof2d; algorithm and code is illustrated in + Section 4.1 of + pooma-publications/pooma.ps. It includes a + figure illustrating parallel communication of data. + +
+ Installing &pooma; + + ADD: How does one install &pooma; using Windows or Mac? + + UPDATE: Make a more recent &pooma; source code file + available on &poomaDownloadPage;. For example, + LINUXgcc.conf is not available. + + In this section, we describe how to obtain, build, and + install the &poomaToolkit;. We focus on installing under the + Unix operating system. Instructions for installing on computers + running Microsoft Windows or MacOS, as well as more extensive + instructions for Unix, appear in . + + Obtain the &pooma; source code &poomaSourceFile; + from the &pooma; download page (&poomaDownloadPage;) available off + the &pooma; home page (&poomaHomePage;). The tgz + indicates this is a compressed tar archive file. To extract the + source files, use tar xzvf &poomaSourceFile;. + Move into the source code directory &poomaSource; directory; e.g., + cd &poomaSource;. + + Configuring the source code prepares the necessary paths for + compilation. First, determine a configuration file in + corresponding to your operating system and compiler in the + config/arch/ directory. + For example, LINUXgcc.conf supports compiling + under a &linux; operating system with &gcc; and SGI64KCC.conf supports compiling + under a 64-bit SGI Unix operating + system with &kcc;. Then, configure the source code: + ./configure --arch LINUXgcc --opt --suite + LINUXgcc-opt. The architecture argument to the + --arch option is the name of the corresponding + configuration file, omitting its .conf suffix. The + --opt indicates the &poomaToolkit; will + contain optimized source code, which makes the code run more + quickly but may impede debugging. Alternatively, the + --debug option supports debugging. The + suite name + can be any arbitrary string. We chose + LINUXgcc-opt to remind us of the architecture + and optimization choice. configure creates subdirectories + named by the suite name LINUXgcc-opt for use when + compiling the source files. Comments at the beginning of + lib/suiteName/PoomaConfiguration.h + record the configuration arguments. + + To compile the source code, set the + POOMASUITE environment variable to the suite name + and then type make. To set the environment + variable for the bash shell use + export + POOMASUITE=suiteName, + substituting the suite name's + suiteName. For the + csh shell, use setenv + POOMASUITE LINUXgcc-opt. Issuing the + make command compiles the &pooma; source code + files to create the &pooma; library. The &pooma; makefiles assume + the GNU &make; so substitute the proper + command if necessary. The &pooma; library can be found in, e.g., + lib/LINUXgcc-opt/libpooma-gcc.a. +
+ +
+ Hand-Coded Implementation + + Before implementing &doof2d; using the &poomaToolkit;, we + present a hand-coded implementation of &doof2d;. See . After querying the + user for the number of averagings, the arrays' memory is + allocated. Since the arrays' size is not known at compile time, + the arrays are accesses via pointers to allocated dynamic memory. + This memory is deallocated at the program's end to avoid memory + leaks. The arrays are initialized with initial conditions. For + the b array, all values except the central ones + have nonzero values. Only the outermost values of the + a array need be initialized to zero, but we + instead initialize them all using the loop used by + b. + + The simulation's kernel consists of triply nested loops. + The outermost loop controls the number of iterations. The inner + nested loops iterate through the arrays' elements, excepting the + outermost elements; note the loop indices range from 1 to n-2 + while the array indices range from 0 to n-1. Each + a value is assigned the average of its + corresponding value in b and the latter's + neighbors. Values in the two-dimensional grids are accessed using + two sets of brackets, e.g., a[i][j]. After + assigning values to a, a second averaging reads + values in a, writing values in + b. + + After the kernel finishes, the final central value is + printed. If the desired number of averagings is even, the value + in b is printed; otherwise, the value in + a is used. Finally, the dynamically-allocated + memory must be freed to avoid memory leaks. + + + Hand-Coded Implementation of &doof2d; + &doof2d-c-element; + + + The user specifies the desired number of averagings. + + + These variables point to the two-dimensional, + dynamically-allocated grids so we use a pointer to a pointer to + a &double;. + + + The user enters the desired grid size. The grid will be + a square with n by n grid cells. + + + Memory for the arrays is allocated. By default, the + array indices are zero-based. + + + Initially, all grid values are zero except for the one + nonzero value at the center of the second array. Array + positions are indicated using two brackets, e.g., + a[i][j]. A better implementation might + initialize only the outermost values of the + a array. + + + These constants indicate the number of iterations, and + the average weighting. + + + Each a value, except an outermost one, + is assigned the average of its analogous b + value and that value's neighbors. Note the loop indices ensure + the outermost values are not changed. The + weight's value ensures the computation is an + average. + + + The second averaging computes b's + values using values stored in a. + + + After the averagings finish, the central value is printed. + + + The dynamically-allocated memory must be deallocated to + avoid memory leaks. + + + + + To compile the executable, change directories to the &pooma; + &poomaExampleDirectory;/Doof2d + directory. Ensure the POOMASUITE environment + variable specifies the desired suite name + suiteName, as we did when compiling + &pooma; in the previous section . Issuing the + make Doof2d-C-element command creates the + executable + suiteName/Doof2d-C-element. + + When running the executable, specify the desired a + nonnegative number of averagings and the nonnegative number of + grid cells along any dimension. The resulting grid has the same + number of cells along each dimension. After the executable + finishes, the resulting value of the central element is + printed. +
+ + +
+ Element-wise &array; Implementation + + The simplest way to use the &poomaToolkit; is to + use the &pooma; &array; class instead of &c; arrays. &array;s + automatically handle memory allocation and deallocation, support a + wider variety of assignments, and can be used in expressions. + + implements &doof2d; using &array;s and element-wise accesses. + Since the same algorithm is used as , we will concentrate + on the differences. + + + Element-wise &array; Implementation of &doof2d; + &doof2d-array-element; + + + To use &pooma; &array;s, the Pooma/Arrays.h must be included. + + + The &poomaToolkit; structures must be constructed before + their use. + + + Before creating an &array;, its domain must be specified. + The N interval represents the + one-dimensional integral set {0, 1, 2, …, n-1}. An + Interval<2> object represents the entire + two-dimensional index domain. + + + An &array;'s template parameters indicate its dimension, + its value type, and how the values will be stored or computed. + The &brick; &engine; type indicates values will be directly + stored. It is responsible for allocating and deallocating + storage so new and + delete statements are not necessary. + The vertDomain specifies the array index + domain. + + + The first statement initializes all &array; values to the + same scalar value. This is possible because each &array; + knows its domain. The second statement + illustrates &array; element access. Indices, separated by + commas, are surrounded by parentheses rather than surrounded by + square brackets ([]). + + + &array; element access uses parentheses, rather than + square brackets + + + &pooma; may reorder computation of statements. Calling + Pooma::blockAndEvaluate ensures all + computation finishes before accessing a particular array + element. + + + Since &array;s are first-class objects, they + automatically deallocate any memory they require, eliminating + memory leaks. + + + The &poomaToolkit; structures must be destructed after + their use. + + + + + We describe the use of &array; and the &poomaToolkit; in + . + &array;s, declared in the Pooma/Arrays.h, are first-class + objects. They know their index domain, can be used + in expressions, can be assigned scalar and array values, and + handle their own memory allocation and deallocation. + + The creation of the a and + b &array;s requires an object specifying their + index domains. Since these are two-dimensional arrays, their + index domains are also two dimensional. The two-dimensional + Interval<2> object is the Cartesian product of + two one-dimensional Interval<1> objects, each + specifying the integral set {0, 1, 2, …, n-1}. + + An &array;'s template parameters indicate its dimension, the + type of its values, and how the values are stored. Both + a and b are two-dimension + arrays storing &double;s so their dimension + is 2 and its element type is &double;. An &engine; stores an + &array;'s values. For example, a &brick; &engine; explicitly + stores all values. A &compressiblebrick; &engine; also explicitly + stores values if more than value is present, but, if all values + are the same, storage for just that value is required. Since an + engine can store its values any way it desires, it might instead + compute its values using a function or compute the values stored + in separate engines. In practice, most explicitly specified + &engine;s are either &brick; or &compressiblebrick;. + + &array;s support both element-wise access and scalar + assignment. Element-wise access uses parentheses, not square + brackets. For example, b(n/2,n/2) + specifies the central element. The scalar assignment b + = 0.0 assigns the same 0.0 value to all array + elements. This is possible because the array knows the extent of + its domain. + + After the kernel finishes, the central value is printed out. + Just prior to this &array; access, a call to + Pooma::blockAndEvaluate() ensures all + computation has finished. &pooma; may reorder computation or + distribute them among various processors. Before reading an + individual &array; value, blockAndEvaluate + ensures the value has the correct value. Calling this function is + necessary only when accessing individual array elements because + &pooma; cannot determine when to call the function itself. For + example, before printing an array, &pooma; will call + blockAndEvaluate itself. + + Any program using the &poomaToolkit; must initialize the + toolkit's data structures using + Pooma::initialize(argc,argv). This + extracts &pooma;-specific command-line options from the + command-line arguments in argv and initializes + the inter-processor communication and other data structures. When + finished, Pooma::finalize() ensures all + computation has finished and the communication and other data + structures are destructed. +
+ + +
+ Data-Parallel &array; Implementation + + &pooma; supports data-parallel &array; accesses. Many + algorithms are more easily expressed using data-parallel + expressions. Also, the &poomaToolkit; might be able to reorder + the data-parallel computations to be more efficient or distribute + them among various processors. In this section, we concentrate + the differences between the data-parallel implementation of + &doof2d; listed in and the + element-wise implementation listed in the previous section . + + + Data-Parallel &array; Implementation of &doof2d; + &doof2d-array-parallel; + + + These variables specify one-dimensional domains {1, 2, + …, n-2}. Their Cartesian product specifies the domain + of the array values that are modified. + + + Data-parallel expressions replace nested loops and array + element accesses. For example, a(I,J) + represents the subset of the a array having + a domain equal to the Cartesian product of I + and J. Intervals can shifted by an additive + or multiplicative constant. + + + + + Data-parallel expressions apply domain objects to containers + to indicate a set of parallel expressions. For example, in the + program listed above, a(I,J) specifies all + of a array excepting the outermost elements. + The array's vertDomain domain consists of the + Cartesian product of {0, 1, 2, …, n-1} and itself, while + I and J each specify {1, 2, + …, n-2}. Thus, a(I,J) is the subset + with a domain of the Cartesian product of {1, 2, …, n-2} + and itself. It is called a view of an + array. It is itself an array, with a domain and supporting + element access, but its storage is the same as + a's. Changing a value in + a(I,J) also changes the same value in + a. Changing a value in the latter also changes + the former if the value is not one of a's + outermost elements. The expression + b(I+1,J+1) indicates the subset of + b with a domain consisting of the Cartesian + product of {2, 3, …, n-1}, i.e., the same domain as + a(I,J) but shifted up one unit and to the + right one unit. Only an &interval;'s value, not its name, is + important. Thus, all uses of J in this program + could be replaced by I without changing the + semantics. + +
+ Adding &array;s + + + + + + Adding two arrays with different domains. + + + When adding arrays, values in corresponding positions are + added even if they have different indices, indicated by the + small numbers adjacent to the arrays. + + +
+ + The statement assigning to a(I,J) + illustrates that &array;s may participate in expressions. Each + addend is a view of an array, which is itself an array. Each view + has the same domain size so their sum can be formed by + corresponding elements of each array. For example, the lower, + left element of the result equals the sum of the lower, left + elements of the addend arrays. For the computation, indices are + ignored; only the relative positions within each domain are used. + + illustrates adding two arrays with different domain indices. The + indices are indicated by the small numbers to the left and the + bottom of the arrays. Even though 9 and 3 have different indices + (1,1) and (2,0), they are added to each other because they have + the same relative positions within the addends. +
+ + +
+ Stencil &array; Implementation + + Many computations are local, computing a &array;'s value by + using close-by &array; values. Encapsulating this computation in + a stencil can yield faster code because the compiler can determine + all accesses come from the same array. Each stencil consists of a + function object and an indication of the stencil's extent. + + + Stencil &array; Implementation of &doof2d; + &doof2d-array-stencil; + + + A stencil is a function object implementing a local + operation on an &array;. + + + &pooma; applies this function call + operator() to the interior domain of an + &array;. Although not strictly necessary, the function's + template parameter C permits using this + stencil with &array;s and other containers. The + read &array; member function supports only + reading values, not writing values, thus possibly permitting + faster access. + + + These two functions indicate the stencil's size. For + each dimension, the stencil extends one cell to the left of (or + below) its center and also one call to the right (or above) its + center. + + + Create the stencil. + + + Applying stencil to the + b array and a subset + interiorDomain of its domain yields an + array, which is assigned to a subset of a. + The stencil's function object is applied to each position in + the specified subset of b. + + + + + Before we describe how to create a stencil, we describe how + to apply a stencil to an array, yielding values. To compute the + value associated with index position (1,3), the stencil's center + is placed at (1,3). The stencil's + upperExtent and + lowerExtent functions indicate which &array; + values the stencil's function will use. See . + Applying the stencil's function call + operator() yields the computed value. To + compute multiple &array; values, apply a stencil to the array and + a domain object: stencil(b, + interiorDomain). This applies the stencil to each + position in the domain. The user must ensure that applying the + stencil does not access nonexistent &array; values. + +
+ Applying a Stencil to an &array; + + + + + + Apply a stencil to position (1,3) of an array. + + + To compute the value associated with index position (1,3) + of an array, place the stencil's center, indicated with dashed + lines, at the position. The computation involves the array + values covered by the array and delineated by + upperExtent and + lowerExtent. + + +
+ + To create a stencil object, apply the &stencil; type to a + function object class. For example, + Stencil<DoofNinePt> stencil declares + the stencil object. The function object class + must define a function call operator() with a + container parameter and index parameters. The number of index + parameters, indicating the stencil's center, must equal the + container's dimension. For example, DoofNinePt + defines operator()(const C& c, int i, int + j). We templated the container type + C although this is not strictly necessary. The + two index parameters i and j + ensure the stencil works with two-dimensional containers. The + lowerExtent indicates how far to the left + (or below) the stencil extends beyond its center. Its parameter + indicates a particular dimension. Index parameters + i and j are in dimension 0 + and 1. upperExtent serves an + analogous purpose. The &poomaToolkit; uses these functions when + distribution computation among various processors, but it does not + use these functions to ensure nonexistent &array; values are not + accessed. Caveat stencil user! +
+ + +
+ Distributed &array; Implementation + + A &pooma; program can execute on one or multiple processors. + To convert a program designed for uniprocessor execution to a + program designed for multiprocessor execution, the programmer need + only specify how each container's domain should be split into + patches. The &poomaToolkit; automatically + distributes the data among the available processors and handles + any required communication between processors. + + + Distributed Stencil &array; Implementation of &doof2d; + &doof2d-array-distributed; + + + The number of processors executing a &pooma; program can + be specified at run-time. + + + The UniformGridPartition declaration + specifies how an array's domain will be partition, of split, + into patches. Guard layers are an optimization that can reduce + data communication between patches. The + UniformGridLayout declaration applies the + partition to the given domain, distributing the resulting + patches among various processors. + + + The MultiPatch &engine; distributes requests + for &array; values to the associated patch. Since a patch may + associated with a different processor, its + remote engine has type + Remote<Brick>. &pooma; automatically + distributes the patches among available memories and + processors. + + + The stencil computation, whether for one processor or + multiple processors, is the same. + + + + + Supporting distributed computation requires only minor code + changes. These changes specify how each container's domain is + distributed among the available processors. The rest of the + program, including all the computations, remains the same. When + running, the &pooma; executable interacts with the run-time + library to determine which processors are available, distributes + the containers' domains, and automatically handles all necessary + interprocessor communication. The same executable runs on one or + many processors. Thus, the programmer can write one program, + debugging it on a uniprocessor computer and running it on a + supercomputer. + +
+ The &pooma; Distributed Computation Model + + + + + + the &pooma; distributed computation model. + + + The &pooma; distributed computation model combines + partitioning containers' domains and the computer configuration + to create a layout. + + +
+ + &pooma;'s distributed computing model separates container + domain concepts from computer configuration concepts. See . + The program indicates how each container's domain will be + partitioned. This process is represented in the upper left corner + of the figure. A user-specified partition specifies how to split + the domain into pieces. For example, the illustrated partition + splits the domain into three equal-sized pieces along the + x-dimension and two equal-sized pieces along the y-dimension. + Thus, the domain is split into patches. + The partition also specifies external and internal guard layers. + A guard layer is a domain surrounding a + patch. A patch's computation only reads but does not write these + values. An external guard layer + conceptually surrounds the entire container domain with boundary + values whose presence permits all domain computations to be + performed the same way even for values along the domain's edge. + An internal guard layer duplicates values + from adjacent patches so communication need not occur during a + patch's computation. The use of guard layers is an optimization; + using external guard layers eases programming and using internal + guard layers reduces communication between processor. Their use + is not required. + + The computer configuration of shared memory and processors + is determined by the run-time system. See the upper right portion + of . + A context is a collection of shared memory + and processors that can execute a program or a portion of a + program. For example, a two-processor desktop computer might have + memory accessible to both processors so it is a context. A + supercomputer consisting of desktop computers networked together + might have as many contexts as computers. The run-time system, + e.g., the Message Passing Interface (&mpi;) Communications Library + (FIXME: xref linkend="mpi99", ) or the &mm; + Shared Memory Library (), communicates + the available contexts to the executable. &pooma; must be + configured for the particular run-time system. See . + + A layout combines patches with + contexts so the program can be executed. If &distributedtag; is + specified, the patches are distributed among the available + contexts. If &replicatedtag; is specified, each set of patches is + replicated among each context. Regardless, the containers' + domains are now distributed among the contexts so the program can + run. When a patch needs data from another patch, the &pooma; + toolkit sends messages to the desired patch uses a message-passing + library. All such communication is automatically performed by the + toolkit with no need for programmer or user input. + + FIXME: The two previous paragraphs demonstrate confusion + between run-time system and message-passing + library. + + Incorporating &pooma;'s distributed computation model into a + program requires writing very few lines of code. illustrates + this. The partition declaration creates a + UniformGridPartition splitting each dimension of a + container's domain into equally-sized + nuProcessors pieces. The first + GuardLayers argument specifies each patch will have + copy of adjacent patches' outermost values. This may speed + computation because a patch need not synchronize its computation + with other patches' processors. Since each value's computation + requires knowing its surrounding neighbors, the internal guard + layer is one layer deep. The second GuardLayers + argument specifies no external guard layer. External guard layers + simplify computing values along the edges of domains. Since the + program already uses only the interior domain for computation, we + do not use this feature. + + The layout declaration creates a + UniformGridLayout layout. As illustrates, + it needs to know a container's domain, a partition, the computer's + contexts, and a &distributedtag; or &replicatedtag;. These + comprise layout's three parameters; the + contexts are implicitly supplied by the run-time system. + + To create a distributed &array;, it should be created using + a &layout; object and have a &multipatch; engine. Prior + implementations designed for uniprocessors constructed the + container using a &domain; object. A distributed implementation + uses a &layout; object, which conceptually specifies a &domain; + object and its distribution throughout the computer. A + &multipatch; engine supports computations using multiple patches. + The UniformTag indicates the patches all have the + same size. Since patches may reside on different contexts, the + second template parameter is Remote. Its + Brick template parameter specifies the engine for a + particular patch on a particular context. Most distributed + programs use MultiPatch<UniformTag, Remote<Brick> + > or MultiPatch<UniformTag, + Remote<CompressibleBrick> > engines. + + The computations for a distributed implementation are + exactly the same as for a sequential implementation. The &pooma; + Toolkit and a message-passing library automatically perform all + computation. + + The command to run the programs is dependent on the run-time + system. To use &mpi; with the Irix 6.5 operating system, one + can use the mpirun command. For example, + mpirun -np 9 Doof2d-Array-distributed -mpi + --num-patches 3 invokes the &mpi; run-time system with + nine processors. The -mpi argument tells + the &pooma; executable Doof2d-Array-distributed + to use the &mpi; Library. + + HERE + + The command Doof2d-Array-distributed -shmem -np 2 + --num-patches 2 + + To run Doof2d-Array-distributed with the &mm; + Shared Memory Library, use + + HERE + + + + COMMENT: See background.html for a partial + explanation. A context is a distinct + region of memory in some computer. Execution thread is associated + with each context. One or more different processors can be + associated with the same context. + + QUESTION: How do &pooma; parallel concepts compare with + Fortran D or high-performance Fortran FINISH CITE: + {koelbel94:_high_perfor_fortr_handb}? + + QUESTION: What does Cheetah do for us? Must configure with + --messaging and Cheetah library must be available. When running + Doof2d benchmark, use --num-patches N. On LinuxKCC, use + '--num-patches p --run-impls 14 --sim-params N 0 1'. Runtime + system must also provide some support. How do I write about this? + What is an example? How does one install Cheetah? + + +
+ + +
+ Relations + + UNFINISHED + +
+ +
+ + + + Overview of &pooma; Concepts + + Describe the software application layers similar to + papers/SCPaper-95.html and "Short Tour of + &pooma;" in papers/SiamOO98_paper.ps. + Section 2.2, "Why a Framework?," of + papers/pooma.ps argues why a layered approach + eases use. Section 3.1, "Framework Layer Description," + describes the five layers. + + FINISH: Write short glossary entries for each of these. + + FINISH: Look through the source code to ensure all main + concepts are listed. + + Here are (preliminary) &pooma; equations: + + + &pooma; <quote>Equations</quote> + + + + + field = data + materials + centering + layout + mesh + + + map from space to values + + + array = data + layout + + + map from indices to values + + + mesh = layout + origin + spacings + + + distribute domain through physical space + + + layout = domain + partition + layout_tag (distributed/replicated) + + + distribute domain's blocks among processors/contexts + + + partition = blocks + guard layers + + + split domain into blocks + + + domain = newDomain + + + space of permissible indices + + + +
+ + + FINISH: Following is a first try at describing the &pooma; + abstraction layers. See also paper illustration. + + + &pooma; Abstraction Layers + + + + + application program + + + &array; &field; (should have + FieldEngine under it) + + + &engine; + + + evaluators + + + +
+ + FINISH: How does parallel execution fit in? + + FINISH: Should we also name and describe each layer? + +
+ Domains + +
+ Section 4 "Future Improvements in + &pooma; II" of + papers/SiamOO98_paper.ps + + A &domain; is a set of discrete points in some space.… + &domain;s provide all of the expected domain calculus + capabilities such as subsetting and intersection. + +
+ + Section 3, "Domains and Views," of + papers/iscope98.pdf describes five types of + domains +
+ + +
+ Layouts + + UNFINISHED + + Also describe partitions and guard cells within here. + +
+ + +
+ Meshes + + UNFINISHED +
+ + +
+ Data-Parallel Statements + + Can we use "An Overview of &pete;" from + papers/PETE_DDJ/ddj_article.html or is this + too low-level? + + Section 3.2.1 of papers/pooma.ps + gives a simple example of data-parallel expression. It also has a + paragraph introducing data-parallel operations and selecting + subsets of domains. Section 3.4 describes the Chained + Expression Object (CEO), apparently a precursor + of &pete;. Regardless, it provides some motivation and + introductory material. + + From Section 4 of + papers/SiamOO98_paper.ps: + + This version of &pete; reduces compile time of user codes + and utilizes compile-time knowledge of expression &domain;s for + better optimization. For example, more efficient loops for + evaluating an expression can be generated if &pete; knows that the + &domain; has unit stride in memory. + + Section 4, "Expressions and Evaluators", of + papers/iscope98.pdf has a good explanation of + &pooma; II's expression trees and expression engines. + + COMMENT: background.html has some related + &pete; material. +
+ +
+ Containers + +
+ &array; + +
+ Section 4 "Future Improvements in + &pooma; II" of + papers/SiamOO98_paper.ps + + An &array; can be thought of as a map from one &domain; to + another.… &array;s depend only on the interface of + &domain;s. Thus, a subset of view of an &array; can be + manipulated in all the same ways as the original &array;. + &array;s can perform indirect addressing because the output + &domain; one one &array; can be used as the input &domain; of + another &array;. &array;s also provide individual element + access. +
+ + + + (unformatted) From + papers/GenericProgramming_CSE/dubois.html: + The &pooma; &array; concept provides an example of how these + generic-programming features can lead to flexible and efficient + code. An Array maps a fairly arbitrary input domain to an + arbitrary range of outputs. When used by itself, an &array; + object A refers to all of the values in its + domain. Element-wise mathematical operations or functions can be + applied to an array using straightforward notation, like A + B + or sin(A). Expressions involving Array objects are themselves + Arrays. The operation A(d), where d is a domain object that + describes a subset of A's domain, creates a view of A that + refers to that subset of points. Like an array expression, a + view is also an Array. If d represents a single point in the + domain, this indexing operation returns a single value from the + range. Equivalently, one can index an N-dimensional Array by + specifying N indices, which collectively specify a single point + in the input domain: A(i1, i2, ..., iN). + + The &pooma; multi-dimensional Array concept is similar to + the Fortran 90 array facility, but extends it in several + ways. Both &pooma; and Fortran arrays can have up to seven + dimensions, and can serve as containers for arbitrary + types. Both support the notion of views of a portion of the + array, known as array sections in F90. The &pooma; Array concept + supports more complex domains, including bounded, continuous + (floating-point) domains. Furthermore, Array indexing in &pooma; + is polymorphic; that is, the indexing operation X(i1,i2) can + perform the mapping from domain to range in a variety of ways, + depending on the particular type of the Array being + indexed. + + Fortran arrays are dense and the elements are arranged + according to column-major conventions. Therefore, X(i1,i2) + refers to element number i1-1+(i2-1)*numberRowsInA. However, as + Fig. 1 shows, Fortran-style "Brick" storage is not the only + storage format of interest to scientific programmers. For + compatibility with C conventions, one might want to use an array + featuring dense, row-major storage (a C-style Brick). To save + memory, it might be advantageous to use an array that only + stores a single value if all its element values are the + same. Other sparse storage schemes that only store certain + values may also be desirable. To exploit parallelism, it is + convenient for an array's storage to be broken up into patches, + which can be processed independently by different CPUs. Finally, + one can imagine an array with no data at all. For example, the + values can be computed from an expression involving other + arrays, or analytically from the indices. + + + The &pooma; &array; Class Template + + Next we describe &pooma;'s model of the Array concept, the + Array class template. The three most important requirements from + the point of view of overall design are: (1) arbitrary domain, + (2) arbitrary range, and (3) polymorphic indexing. These express + themselves in the template parameters for the &pooma; Array + class. The template + + template <int Dim, class T = double, class EngineTag = Brick> + class Array; + + is a specification for creating a set of classes all named + Array. The template parameters Dim, T, and EngineTag determine + the precise type of the Array. Dim represents the dimension of + the array's domain. T gives the type of array elements, thereby + defining the output range of the array. EngineTag specifies the + the manner of indexing and types of the indices. + + End From + papers/GenericProgramming_CSE/dubois.html: + + Section 2, "Arrays and Engines," of + papers/iscope98.pdf describes both &array;s + and &engine;s. This may or may not duplicate the material in + papers/GenericProgramming_CSE/dubois.html. + +
+ Views of &array;s + + Section 3, "Domains and Views," of + papers/iscope98.pdf motivates the need for + views: +
+ One of the primary uses of domains is to specify + subsections of &array; objects. Subarrays are a common + feature of array classes; however, it is often difficult to + make such subarrays behave like first-class objects. The + &pooma; II engine concept provides a clean solution to + this problem: subsetting an &array; with a domain object + creates a new &array; that has a view engine. +
+
+
+
+ +
+ &field; + + QUESTION: Do we include boundary conditions here? + + FINISH: Do we have an example that shows something not possible + with &array;? + + Describe and illustrate multi-material and + multivalue? + + ADD: description of meshes and guard layers. + +
+ + +
+ <type>TinyMatrix</type> + + Section 3.2.2 of + papers/pooma.ps describes &vector;s and + matrix classes. +
+
+ +
+ Engines + + (unformatted) From + papers/GenericProgramming_CSE/dubois.html: + + The Engine Concept + + To implement polymorphic indexing, the Array class defers + data storage and data lookup to an engine object. The requirements + that the Array template places on its engine provide the + definition for the Engine concept. We'll describe these by + examining a simplified version of the Array template, shown in + Fig. 2. + + First, the Array class determines and exports (makes + Engine_t part of Array's public interface) the type of the engine + class that it will use: + + typedef Engine<Dim, T, EngineTag> Engine_t; + + This statement declares Engine_t to be an alias for the type + Engine<Dim,T,EngineTag>. This is the first requirement + placed on engine classes: they must be specializations of a + general Engine template whose template parameters are identical to + those of Array. Next, the Array template determines the type of + scalar arguments (indices) to be used in operator(), the function + that implements &pooma;'s Fortran-style indexing syntax X(i1,i2): + + typedef typename Engine_t::Index_t Index_t; + + This statement defines another type alias: + Array<Dim,T,EngineTag>::Index_t is simply an alias for + Engine_t::Index_t. Engine_t::Index_t is a qualified name, which + means that the type Index_t is found in the class Engine_t. This + is the second requirement for the Engine concept: the class + Engine_t must define a public type called Index_t. This line will + not compile if that definition is not supplied. This indirection + is one of the ways that &pooma; supports polymorphic indexing. If + the Engine works with a discrete integer domain, it defines its + Index_t to be an integral type. If the Engine works in a + continuous domain, it defines its Index_t to be a floating-point + type. + + The data lookup is performed in the operator() function. We + see that Array simply passes the indices on to its engine + object. Thus, we have the third requirement for the Engine + concept: it must provide a version of operator() that takes Dim + values of type Index_t. + + Simply passing the indices on to the engine object may seem + odd. After all, engine(i,j) looks like we're just indexing another + array. There are several advantages to this extra level of + indirection. The Array class is as faithful a model of the Array + concept as possible, while the Engine class is a low-level + interface to a user-defined data source. As a result, Array has a + wide variety of constructors for user convenience, while engines + have but a few. Array supports a wide variety of overloaded + operator() functions for view creation and indexing. Engines + support indexing only. Array does not have direct access to the + data, which is managed by the engine object. Finally, Array has a + wide variety of overloaded mathematical operators and functions, + and works with the Portable Expression Template Engine (PETE) [4] + to provide efficient evaluation of Array expressions. Engines have + no such support. In general, Array is much more complex and + feature-laden than Engine. This is the prime advantage of the + separation of interface and implementation: Array only has to be + implemented once by the &pooma; developers. Engines are simple + enough to be written by users and plugged directly into the Array + framework. + + Figure 3 illustrates the "Brick" specialization of the + Engine template, which implements Fortran-style lookup into a + block of memory. First, there is the general Engine template, + which is empty as there is no default behavior for an unknown + EngineTag. The general template is therefore not a model for the + Engine concept and Array classes attempting to use it will not + compile. Next, there is the definition of the Brick class, a + policy tag whose sole purpose is to select a particular + specialization of the Engine template. Finally, there is the + partial specialization of the Engine template. Examining its body, + we see the required Index_t typedef and the required operator(), + which follows the Fortran prescription for generating an offset + into the data block based on the row, column, and the number of + rows. All of the requirements are met, so the Brick-Engine class + is a model of the Engine concept. + + End From + papers/GenericProgramming_CSE/dubois.html: + + (unformatted) From + papers/GenericProgramming_CSE/dubois.html: + + Compile-time Versus Run-Time Polymorphism + + Encapsulating the indexing in an Engine class has important + advantages, both in terms of flexibility and efficiency. To + illustrate this point, we introduce the PolarGaussian-Engine + specialization in Fig. 4. This is an analytic engine that + calculates its values directly from its inputs. Unlike the + Brick-Engine, this engine is "indexed" with data of the same type + as its output: it maps a set of T's to a single T. Therefore, the + Index_t typedef selects T as the index type, as opposed to the int + in the Brick-Engine specialization. The operator() function also + differs in that it computes the return value according to an + analytic formula. + + Both Engine<Dim,T,Brick> and + Engine<Dim,T,PolarGaussian> can be plugged in to an Array by + simply varying the Array's EngineTag. This is possible despite the + fact that the two classes exhibit dramatically different behavior + because they are both models of the Engine concept. + + Notice that we have achieved polymorphic indexing without + the use of inheritance or virtual functions. For instance, + consider the following code snippet: + + Array<2, double, Brick> a; + Array<2, double, PolarGaussian> b; + + double x = a(2, 3); // x = a.engine.data[2 + 3 * a.engine.numRows]; + double y = b(2.0, 3.0); // y = exp(-(2.0*2.0+3.0*3.0) / b.engine.delta); + + The data lookup functions for the two Arrays perform completely + different operations. Since this is accomplished using static + types, it is known as compile-time polymorphism. Moreover, + everything is known at compile time, so the functions are fully + inlined and optimized, thereby yielding code equivalent to that + shown in the comments above. + + The flexibility and efficiency of compile-time polymorphism + cannot be duplicated with a run-time implementation. To illustrate + this point, in Fig. 5, we re-implement our Array concept using the + classic Envelope-Letter pattern [5], with the array class, + RTArray, being the envelope and the run-time-engine, RTEngine, + being the letter. RTArray defers data lookup to the engine object + by invoking the engine's functions through a pointer to the + RTEngine base class. Figure 6 illustrates the RTEngine base class + and Fig. 7 illustrates two descendants: RTBrick and + RTPolarGaussian. + + The run-time implementation provides the same basic + functionality as the compile-time implementation, but it is not as + flexible or as efficient. It lacks flexibility in that the return + type of the indexing operation must be specified in the RTEngine + base class and in the RTArray class. Thus, in Figs. 5 and 6,we see + versions of RTArray::operator() and RTEngine::index functions that + take both int's and T's. If the programmer wants to add another + index-type option, these classes must be modified. This is a + violation of the open-closed principle proposed by Meyer + [6]. Also, since RTEngine descendants will usually only implement + one version of index, we cannot make RTEngine an abstract base + class. Instead, we have the default versions of index throw an + exception. Thus, compile-time error checking is + weakened. Furthermore, since indexing is done via a virtual + function call, it will almost never be inlined, which is not + acceptable in most scientific applications. + + There are advantages to the Envelope-Letter approach. First, + all RTArray objects have the same type, allowing them to be stored + in homogeneous collections. This can simplify the design of some + applications. Second, RTArray objects can change their engines at + runtime, and thus effectively change their types on the fly­­this + is the primary reason for using the Envelope-Letter idiom, and can + be very important in some applications. + + For most scientific applications, however, these issues are + minor, and maximum performance for array indexing is of paramount + importance. Our compile-time approach achieves this performance + while providing the desired polymorphic indexing. + + From Section 4 of + papers/SiamOO98_paper.ps: + + The &array; class is templated on an &engine; type that + handles the actual implementation of the mapping from input to + output. Thus, the &array; interface features are completely + separate from the implementation, which could be a single &c; + array, a function of some kind or some other mechanism. This + flexibility allows an expression itself to be viewed through the + &array; interface. Thus, one can write something like + + foo(A*B+C); + where A, B and + C are &array;s and foo is + a function taking an &array; as an argument. The expression + A*B+C + will only be evaluated by the expression engine as needed by + foo. + + In fact, one can even write &engine;s which are wrappers + around external data structures created in non-&pooma; codes and + know to manipulate these structures. Once this is done, the + external entities have access to the entire &array; interface and + can utilize all of the powerful features of + &pooma; II. + + Section 2, "Arrays and Engines," of + papers/iscope98.pdf describes both &array;s + and &engine;s. This may or may not duplicate the material in + papers/GenericProgramming_CSE/dubois.html. + + Section 4, "Expressions and Evaluators", of + papers/iscope98.pdf has a good explanation of + &pooma; II's expression trees and expression engines. + + + MultiPatch Engine + From README: To actually use multiple + contexts effectively, you need to use the MultiPatch engine with + patch engines that are Remote engines. Then the data will be + distributed across multiple contexts instead of being copied on + every context. See the files in example/Doof2d for a simple + example that creates a MultiPatch array that can be distributed + across multiple contexts and performs a stencil computation on + that array. + + +
+ + +
+ Relations + + UNFINISHED +
+ + +
+ Stencils + + Section 3.5.4, "Stencil Objects," of + papers/pooma.ps provides a few uses of + stencils. + + Section 5, "Performance," of + papers/iscope98.pdf motivates and explains + stencils. +
+ + +
+ Contexts + +
+ background.html + In order to be able to cope with the variations in machine + architecture noted above, &pooma;'s distributed execution model + is defined in terms of one or more contexts, each of which may + host one or more threads. A context is a distinct region of + memory in some computer. The threads associated with the context + can access data in that memory region and can run on the + processors associated with that context. Threads running in + different contexts cannot access memory in other contexts. + + A single context may include several physical processors, + or just one. Conversely, different contexts do not have to be on + separate computers—for example, a 32-node SMP computer could + have up to 32 separate contexts. This release of &pooma; only + supports a single context for each application, but can use + multiple threads in the context on supported platforms. Support + for multiple contexts will be added in an upcoming + release. +
+
+ + +
+ Utility Types: ???TITLE?? + +
+ &vector; + + Section 3.2.2 of + papers/pooma.ps describes &vector;s and + matrix classes. +
+ +
+
+ + + + Writing Sequential Programs + + UNFINISHED + +
+ &benchmark; Programs + + Define a &benchmark; program vs. an example or an + executable. Provide a short overview of how to run these + programs. Provide an overview of how to write these programs. + See src/Utilities/Benchmark.h. +
+ + +
+ Using <type>Inform</type>s for Output + + UNFINISHED +
+ +
+ + + + Writing Distributed Programs + + Discuss the distributed model and guard cells. See docs/parallelism.html. + + Does any of the parallel implementation described in + papers/SCPaper-95.html still apply? + + ?Tuning program for maximize parallel performance? + + external references to &mpi; and threads + + QUESTION: Are there interesting, short parallel programs in + any &mpi; book that we can convert to &pooma;? + +
+ Layouts + + An out-of-date description can be found in Section 3.3, + especially 3.3.2, of papers/pooma.ps + describes the global/local interactions and parallel abstraction + layers. +
+ +
+ Parallel Communication + + An out-of-date description can be found in + Section 3.3.3 of papers/pooma.ps +
+ +
+ Using Threads + + QUESTION: Where do threads fit into the manual? Do threads + even work? + + From Section 4, of + papers/SiamOO98_paper.ps + + &pooma; II will make use of a new parallel run-time + system called &smarts; that is under development at the ACL. + &smarts; supports lightweight threads, so the evaluator will be + able to farm out data communication tasks and the evaluation of + subsets of an expression to multiple threads, thus increasing the + overlap of communication and computation. Threads will also be + available at the user level for situations in which a + task-parallel approach is deemed appropriate. +
+ +
+ + + + Under the Hood: How &pooma; Works + + from point of view of &cc; interpreter + +
+ &pete; + + Use the material in + papers/PETE_DDJ/ddj_article.html, which gives + example code and descriptions of how the code works. + + See material in background.html's Expression + Templates. +
+ +
+ + + + Debugging and Profiling &pooma; Programs + + UNFINISHED + + + + + + Example Program: Jacobi Solver + + QUESTION: Is this chapter necessary? Do we have enough + existing source code to write this chapter? + + +
+ + + &pooma; Reference Manual + + + TMP: This Chapter Holds These Comments But Will Be Removed + + For each template parameter need to describe the constraints + on it. + + Remove this section when the following concerns have been + addressed. + + Add a partintro explaining file suffixes such as .h, .cpp, .cmpl.cpp, .mk, .conf. Should we also explain use + of inline even when necessary and the template + model, e.g., including .cpp files. + + QUESTION: What are the key concepts around which to organize + the manual? + + QUESTION: What format should the manual use? + +
+ Musser, Derge, and Sanai, §20.0. + It is important to state the requirements on the components + as generally as possible. For example, instead of saying + class X must define a member function + operator++(), we say for any + object x of type X, + ++x is defined. +
+
+ + + + A Typical &pooma; Class + + + Class Member Notation + + + *_t + + + + type within a class. QUESTION: What is the &cc; name for + this? + + + + + *_m + + + + data member + + + + + + &pooma; Class Vocabulary + + component + + one of several values packaged together. For example, a + three-dimensional vector has three components, i.e., three + values. + + + + element-wise + + applied to each element in the group, e.g., an array + + + + reduction + + repeated application of a binary operator to all elements, + yielding one value + + + + tag + + an enumerated value indicating inclusion in a particular + semantic class. The set of values need not be explicitly + declared. + + + + + + + + + Installing and Configuring &pooma; + + + + Installing &pooma;. + + + Requirements for configuration files. + + + + Include descriptions of using &smarts;, &cheetah;, τ, + &pdt;. + + QUESTION: Does it install on windows and on mac? If so, what + are the instructions? See also INSTALL.{mac,unix,windows}. + + README has some + information on &cheetah; and threads in the Message-Based + Parallelism section. + + Which additional packages are necessary and when? + + What configure options should we list? See configure. Be sure to list + debugging option and how its output relates to config/LINUXgcc.suite.mk. + + config/arch has files + for (OS, compiler) pairs. Explain how to modify a configuration + file. List requirements when making a new configuration file (low + priority). + + config/LINUXgcc.suite.mk has output + from configure. Useful to + relate to configuration files and configure's debugging output. + + + + + + Compilation and &make; Files + + We assume Gnu make. Do we know what assumptions are made? + + How do all these files interact with each other? Ala a make + interpreter, give an example of which files are read and + when. + + + config/Shared/README.make + This has short descriptions of many files, + especially in config/Shared. + + makefile + These appear throughout all directories. What are + the equivalences classes and what are their + parts? + + include.mk + What does this do? Occurs in many directories: + when? Template seems to be config/Shared/include2.mk. + + subdir.mk + list of subdirectories; occurs in several + directories: when? src/subdir.mk is a good + example. + + + objfile.mk + + list of object files to construct, presumably from + *.cmpl.cpp files. + src/Utilities/objfile.mk is an + example. + + + config/Shared/rules.mk + most compiler rules + + config/head.mk + read at beginning of each + makefile? + + config/Shared/tail.mk + read at end of each makefile? + + config/Shared/variables.mk + Is this used? + + config/Shared/compilerules.mk + table of origin and target suffixes and commands + for conversion + + + + + + + + + &array;s + + Include src/Pooma/Arrays.h to use &array;s. + The implementation source code is in src/Array. + + FINISH: Define an array. Introduce its parts. + + ADD: some mention of the maximum supported number of + dimensions somewhere. + +
+ The &array; Container + + + Template Parameters + + + + + Parameter + Interpretation + + + + + Dim + dimension + + + T + array element type + + + EngineTag + type of computation engine object + + + +
+ + QUESTION: How do I introduce class type definitions, when + they are used, i.e., compile-time or run-time, and when + programmers should use them? + + + Compile-Time Types and Values + + + + + Type or Value + Interpretation + + + + + This_t + the &array; object's type + + + Engine_t + the &array; object's engine's type + + + EngineTag_t + indication of engine's category + + + Element_t + the type of the array elements, i.e., T + + + ElementRef_t + the type of a reference to an array element, + i.e., T&. Equivalently, the type to write to a + single element. + + + Domain_t + the array's domain's type, i.e., the type of the + union of all array indices + + + Layout_t + unknown + + + dimensions + integer equalling the number of dimensions, i.e., + Dim + + + rank + integer equalling the number of dimensions, i.e., + Dim; a synonym for + dimensions + + + +
+ +
+ Constructors and Destructors + + + Constructors and Destructors + + + + + Function + Effect + + + + + + + Array + + + + Creates an array that will be resized + later. + + + + + Array + const Engine_t& + engine + + + Creates an array with an engine equivalent to + the engine. This array will have the + same values as engine. QUESTION: Why + would a user every want to use this + constructor? + + + + + Array + + const + Engine<Dim2, T2, EngineTag2>& + engine + + + const + Initializer& init + + + + What does this do? + + + ADD ALL CONSTRUCTORS AND DESTRUCTORS. + + + +
+
+ + +
+ Initializers + + Add a table. +
+ + +
+ Element Access + + + &array; Element Access + + + + + Function + Effect + + + + + + + Element_t read + + + + unknown: See line 1839. + + + + + Element_t read + + const + Sub1& s1 + + + const + Sub2& s2 + + + + How does the version with template parameters, + e.g., Sub1 differ from the int + version? + + + + + Element_t operator() + + const + Sub1& s1 + + + const + Sub2& s2 + + + + How does this differ from read(const + Sub1& s1, const Sub2& s2)? + + + ADD ALL reads and + operator()s. + + + +
+
+ + +
+ Component Access + + When an array stores elements having components, e.g., an + array of vectors, tensors, or arrays, the + comp returns an array consisting of the + specified components. The original and component array share the + same engine so changing the values in one affects values in the + other. + + For example, if &n; × &n; array a + consists of three-dimensional real-valued vectors, + a.comp(1) returns a &n; × &n; + real-valued array of all the middle vector components. Assigning + to the component array will also modify the middle components of + the vectors in a. + + + &array; Component Access + + + + + Function + Effect + + + + + + + UNKNOWN compute this comp + + const + int& + i1 + + + + unknown: See line 1989. + + + ADD ALL comps. + + + +
+
+ +
+ Accessors + + + &array; Accessor Methods + + + + + Function + Effect + + + + + + + int first + + int + d + + + + unknown: See line 2050 + + + ADD ALL other accessor methods, including + engine. + + + +
+
+ + +
+ Copying &array;s + + Explain how copied arrays and views of arrays share the + same underlying engine so changing values in one also affects the + other. This is called a shallow copy. +
+ + +
+ Utility Methods + + + &array; Utility Methods + + + + + Function + Effect + + + + + + + void makeOwnCopy + + + + unknown: See line 2044 + + + ADD ALL other utility methods. + + + +
+
+ + +
+ Implementation Details + + As a container, an &array;'s implementation is quite + simple. Its privatedata consists of + an engine, and it has no private + functions. + + + &array; Implementation Data + + + + + Data Member + Meaning + + + + + + + private + Engine_t engine_m + + + engine computing the array's values + + + +
+ +
+
+ + +
+ &dynamicarray;s: Dynamically-Sized Domains + + A DynamicArray is a read-write array with extra + create/destroy methods. It can act just like a regular Array, but + can have a dynamically-changing domain. See src/DynamicArray/DynamicArray.h. + + ADD: Briefly describe what the class does and an example of + where it is used. + + ADD: Check that its interface is actually the same as for + &array;. + + ADD: Check that the operations on dynamic arrays are + actually the same as for &array;. See src/DynamicArray/DynamicArrayOperators.h, + src/DynamicArray/PoomaDynamicArrayOperators.h, + and src/DynamicArray/VectorDynamicArrayOperators.h. + + +
+ Implementation Details + + DynamicArray has no + protected or + private members. +
+
+ + +
+ Views of &array;s + + UNFINISHED +
+ + +
+ &array; Assignments + + &pooma; supports assignments to &array;s of other &array;s + and scalar values. QUESTION: Is the following correct? For the + former, the right-hand side array's domain must be at least as + large as the left-hand side array's domain. Corresponding values + are copied. Assigning a scalar value to an array ensures all the + array elements have the same scalar value. + + UNFINISHED: Add a table containing assignment operators + found one lines 2097–2202. +
+ + +
+ Printing &array;s + + &array;s support output to but not input from IO streams. + In particular, output to ostreams and file streams is + supported. + + Add a table, using src/Array/Array.h, lines + 2408–2421. See the implementation in src/Array/PrintArray.h. + + QUESTION: How does one print a &dynamicarray;. +
+ + +
+ Expressions Involving &array;s + + In &pooma;, expressions may contain entire &array;s. That + is, &array;s are first-class objects with respect to expressions. + For example, given &array;s a and + b, the expression a + b + is equivalent to an array containing the element-wise sum of the + two arrays. + + Any finite number of the operators listed below can be used + in an expression. The precedence and order of operation is the + same as with ordinary built-in types. + + QUESTION: Do &field;s also support the same set of + operations? + + QUESTION: Some operations in src/Field/FieldOperators.h use both + &array; and &field;. Do we list them here or in the &field; + section or both or somewhere else? + + In the table below, &array; supplants the exact return types + because they are complicated and rarely need to be explicitly + written down. + + + Operators on &array; + + + + + Operator + Value + + + + + + + + Array acos + const Array<Dim,T,EngineTag>& a + + + + an array containing the element-wise inverse + cosine of the array a + + + ADD ALL other operators appearing in src/Array/ArrayOperators.h, + src/Array/ArrayOperatorSpecializations.h, + src/Array/PoomaArrayOperators.h, + and src/Array/VectorArrayOperators.h. + + + +
+ + FINISH: Write one or two examples or refer to ones + previously in the text. +
+ + +
+ Reducing All &array; Elements to One Value + + These reduction functions repeatedly apply a binary + operation to all array elements to yield a value. These functions + are similar to the Standard Template Library's + accumulate function. For example, + sum repeatedly applies the binary plus + operator to all array elements, yielding the sum of all array + elements. + + FINISH: What order of operation, if any, is + guaranteed? + + FINISH: Add a table of the functions in src/Array/Reductions.h. + + How does one use one's own binary function? See src/Engine/Reduction.h. +
+ + +
+ Utility Functions + +
+ Compressed Data + + Add a table containing + elementsCompressed, + compressed, compress, + and uncompress. +
+ + +
+ Centering Sizes and Number of Materials + + ADD: a description of numMaterials and + centeringSize found in src/Field/Field.h. These functions + are meaningless for &array; but are provided for consistency with + &field;. +
+ +
+ Obtaining Subfields + + ADD: a description of subField found + in src/Field/Field.h. + This function, meaningless for &array;, is provided for + consistency with &field;. +
+
+ + +
+ TMP: What do we do with these …? Remove this + section. + + QUESTION: Do we describe the &leaffunctor;s specialized for + &array;s in src/Array/Array.h or in the &pete; + reference section? What about the functions in src/Array/CreateLeaf.h? + + QUESTION: What is an EngineFunctor? We + probably should describe it in an analogous way as for + &leaffunctor;s. + + QUESTION: Where do we write about + ExpressionTraits for &array;s? + + QUESTION: Do we describe the ElementProperties + specialization at this place or in its section? + + QUESTION: Do we describe the Patch + specialization for &array;s (src/Array/Array.h:1300) in this + place or in a section for patches? +
+
+ + + + &field;s + + An &array; is a set of values indexed by + coordinates, one value per coordinate. It models the computer + science idea of an array. Similarly, a &field; is a set of values + indexed by coordinate. It models the mathematical and physical + idea of a field represented by a grid of rectangular cells, each + having at least one value. A &field;'s functionality is a superset + of an &array;'s functionality because: + + + A &field; is distributed through space so one can compute + the distances between cells. + + + Each cell can hold multiple values. For example, a + rectangular cell can have one value on each of its faces. + + + Multiple materials can share the same cell. For example, + different values can be stored in the same cell for carbon, + oxygen, and nitrogen. + + + Also, &field;s' values can be related by relations. Thus, if one + field's values change, a dependent field's values can be + automatically computed when needed. FIXME: See also the unfinished + works chapter's entry concerning relations and arrays. + + QUESTION: Should we add a picture comparing and contrasting + an array and a field? + + QUESTION: How much structure can be copied from the &array; + chapter? + + QUESTION: Where is NewMeshTag, defined in + src/Field/Field.h, + used? + + QUESTION: Do we describe the &leaffunctor;s specialized for + &field;s in src/Field/Field.h or in the &pete; + reference section? Use the same decision for &array;s. + + QUESTION: What do the structure and functions in src/Field/Mesh/PositionFunctions.h + do? + + +
+ The &field; Container + + ADD: table of template parameters and table of compile-time + types and values. + + +
+ Constructors and Destructors + + ADD: this section similar to &array;s's constructor and + destructor section. +
+ +
+ Initializers + + Add a table. +
+ + +
+ Element Access + + ADD: a table ala &array;. Be sure to include + all. +
+ + +
+ Component Access + + ADD: a table ala &array;. +
+ + +
+ Obtaining Subfields + + ADD: discussion and a table listing ways to obtain + subfields. Although the implementation may treat subfield views + and other field views similarly (?Is this true?), they are + conceptually different ideas so we present them + separately. + + See src/Field/Field.h's + operator[], + subField, …, + material. +
+ + +
+ Supporting Relations + + ADD: a table with the member functions including + addRelation, + removeRelations, + applyRelations, and + setDirty. +
+ + +
+ Accessors + + ADD: a table using lines like src/Field/Field.h:1243–1333. +
+ + +
+ Utility Methods + + ADD: a table including + makeOwnCopy. +
+ + +
+ Implementation Details + + ADD: a table similar to &array;'s. + +
+ +
+ + +
+ Views of &field;s + + Be sure to relate to &array; views. Note only three + dimensions are supported. + + Be sure to describe f[i]. Does this + refer to a particular material or a particular value within a + cell? I do not remember. See SubFieldView in + src/Field/Field.h. +
+ + +
+ &field; Assignments + + ADD: Describe supported assignments, relating to &array;'s + assignments. + + UNFINISHED: Add a table containing assignment operators + found on src/Field/Field.h:2097–2202 + and 1512–1611. +
+ + +
+ Printing &field;s + + QUESTION: How similar is this to printing &array;s? + + &field;s support output to but not input from IO streams. + In particular, output to ostreams and file streams is + supported. + + Add a table, using src/Field/Field.h, lines + 1996–2009. See the implementation in src/Field/PrintField.h. +
+ + +
+ Combining &field; Elements + + Like &array;s, &field;s support reduction of all elements to + one value. Additionally, the latter supports computing a field's + values using field stencils. QUESTION: How do I describe this + with a minimum of jargon? + + ADD: something similar to &array; reductions. + + FINISH: Add a table of the functions in src/Field/FieldReductions.h. + + FINISH: Add a table of the functions in src/Field/DiffOps/FieldOffsetReductions.h. + QUESTION: Why is only sum defined? +
+ + +
+ Expressions Involving &field;s + + Do something similar to &array;'s section. See the + operations defined in src/Field/FieldOperators.h, + src/Field/FieldOperatorSpecializations.h, + src/Field/PoomaFieldOperators.h, and + src/Field/VectorFieldOperators.h. + + Some operations involve both &array; and &field; + parameters. Where do we list them? +
+ + +
+ &field; Stencils: Faster, Local Computations + + ADD: a description of a stencil. Why is it needed? How + does a user use it? How does a user know when to use one? Add + documentation of the material from src/Field/DiffOps/FieldStencil.h. + + How is FieldShiftEngine used by &field; + stencils? Should it be described here or in the &engine; section? + See the the code in src/Field/DiffOps/FieldShiftEngine.h. +
+ + +
+ Cell Volumes, Face Areas, Edge Lengths, Normals + + ADD: a description of these functions. See src/Field/Mesh/MeshFunctions.h. + These are initialized in, e.g., src/Field/Mesh/UniformRectilinearMesh.h. + Note that these do not work for NoMesh. +
+ + +
+ Divergence Operators + + ADD: a table having divergence operators, explaining the + current restrictions imposed by what is implemented. See + src/Field/DiffOps/Div.h + and src/Field/DiffOps/Div.UR.h. What + restrictions does UR (mesh) connote? +
+ + +
+ Utility Functions + +
+ Compressed Data + + Add a table containing + elementsCompressed, + compressed, compress, + and uncompress. +
+ + +
+ Centering Sizes and Number of Materials + + ADD: a description of numMaterials and + centeringSize found in src/Field/Field.h. + + QUESTION: How do these relate to any method functions? +
+ + +
+ Obtaining Subfields + + ADD: a description of subField found + in src/Field/Field.h. +
+ +
+ + +
+ &field; Centerings + + DO: Describe the purpose of a centering and its definition. + Describe the ability to obtain canonical centerings. Explain how + to construct a unique centering. See src/Field/FieldDentering.h. +
+ + +
+ Relative &field; Positions + + Permit specifying field positions relative to a field + location. Describe FieldOffset and + FieldOffsetList. See src/Field/FieldOffset.h +
+ + +
+ Computing Close-by Field Positions + + Given a field location, return the set of field locations + that are closest using ?Manhattan? distance. See src/Field/NearestNeighbors.h. +
+ + +
+ Mesh ??? + + Unlike &array;s, &field;s are distributed throughout space + so distances between values within the &field can be computed. A + &field;'s mesh stores this spatial distribution. + + QUESTION: What do we need to write about meshes? What is + unimportant implementation and what should be described in this + reference section? + + QUESTION: Where in here should emphasize vertex, not cell, + positions? VERTEX appears repeatedly in src/Field/Mesh/NoMesh.h. + + + Mesh Types + + + + + Mesh Type + Description + + + + + NoMesh<Dim> + no physical spacing, causing a &field; to mimic + an &array; with multiple engines. + + + UniformRectilinearMesh<Dim,T> + physical spacing formed by the Cartesian product + of ????. + + + +
+ + +
+ Mesh Accessors + + ADD: a table listing accessors, explaining the difference + between (physical and total) and (cell and vertex) domains. See + src/Field/Mesh/NoMesh.h. + Also, include spacings and + origin in src/Field/Mesh/UniformRectilinearMesh.h. + Note NoMesh does not provide the latter two. +
+ +
+ + +
+ TMP: What do we do with these …? Remove this + section. + + QUESTION: Do we describe the Patch + specialization for &field; at this place or in some common place? + Follow &array;'s lead. + + QUESTION: Where do we describe CreateLeaf and + MakeFieldReturn in src/Field/FieldCreateLeaf.h and + src/Field/FieldMakeReturn.h. + + QUESTION: What do we do with FieldEnginePatch + in src/Field/FieldEngine/FieldEnginePatch.h. +
+
+ + + + &engine;s + + From a user's point of view, a container makes data available + for reading and writing. In fact, the container's &engine; stores + the data or, if the data is computed, performs a computation to + yield the data. + + FINISH: Introduce the various types of engines. Add a table + with a short description of each engine type. + + FINISH: First, we specify a generic &engine;'s interface. + Then, we present &engine; specializations. + + + Types of &engine;s + + + + + Engine Type + Engine Tag + Description + + + + + Brick + Brick + Explicitly store all elements in, e.g., a &cc; + array. + + + Compressible + CompressibleBrick + If all values are the same, use constant storage + for that single value. Otherwise, explicitly store all + elements. + + + Constant + ConstantFunction + Returns the same constant value for all + indices. + + + Dynamic + Dynamic + Manages a contiguous, local, one-dimensional, + dynamically resizable block of data. + + + Component Forwarding + CompFwd<EngineTag, + Components> + Returns the specified components from + EngineTag's engine. Components are + pieces of multi-value elements such as vectors + and tensors. + + + Expression + ExpressionTag<Expr> + Returns the value of the specified &pete; + expression. + + + Index Function + IndexFunction<Functor> + Makes the function + Functoraccepting indices mimic an + array. + + + MultiPatch + MultiPatch<LayoutTag,PatchTag> + Support distributed computation using several + processors (???contexts???). LayoutTag + indicates how the entire array is distributed among the + processors. Each processor uses a PatchTag + engine. + + + Remote + Remote<EngineTag> + unknown + + + Remote Dynamic + Remote<Dynamic> + unknown: specialization + + + Stencil + StencilEngine<Function, + Expression> + Returns values computed by applying the + user-specified function to sets of contiguous values in the + given engine or container. Compare with user function + engines. + + + User Function + UserFunctionEngine<UserFunction,Expression> + Returns values computed by applying the + user-specified function to the given engine or container. + QUESTION: Is the following claim correct? For each returned + value, only one value from the engine or container is + used. + + + +
+ + QUESTION: Where do we describe views? + + QUESTION: What does NewEngine do? Should it be + described when describing views? Should it be omitted as an + implementation detail? + + QUESTION: Where do we describe &engine; patches found in + src/Engine/EnginePatch.h? + All patch data in a separate chapter or engine-specific pieces in + this chapter? + + QUESTION: What is notifyEngineWrite? + See also src/Engine/NotifyEngineWrite.h. + + QUESTION: What aspect of MultiPatch uses IsValid in + src/Engine/IsValidLocation.h? + + QUESTION: Who uses intersections? Where should this be + described? See src/Engine/Intersector.h, src/Engine/IntersectEngine.h, and + src/Engine/ViewEngine.h. + +
+ &engine; Compile-Time Interface + + ADD: a table of template parameters ala &array;. ADD: + compile-time types and values. +
+ + +
+ Constructors and Destructors + + ADD: a table of constructors and destructors ala + &array;'s. +
+ + +
+ Element Access + + ADD: a table with read and + operator(). +
+ + +
+ Accessors + + ADD: a table of accessors. +
+ + +
+ &engine; Assignments + + similar to &array;'s assignments. shallow copies. ADD: a + table with one entry +
+ + +
+ Utility Methods + + ADD: a table including + makeOwnCopy. + + QUESTION: What are dataObject, + isShared, and related methods? +
+ + +
+ Implementation Details + + ADD: this section. Explain that + dataBlock_m and data_m point + to the same place. The latter speeds access, but what is the + purpose of the former? +
+ + +
+ Brick and BrickView Engines + + ADD: description of what a brick means. ADD: whatever + specializations the class has, e.g., + offset. + + QUESTION: What does DoubleSliceHelper do? +
+ + +
+ Compressible Brick and BrickView Engines + + ADD this. +
+ + +
+ Dynamic and DynamicView Engines: + + ADD this. Manages a contiguous, local, resizable, 1D block + of data. +
+ + +
+ Component Engines + + I believe these implement array component-forwarding. See + src/Engine/ForwardingEngine.h. +
+ + +
+ Expression Engines + + Should this be described in the &pete; section? Unlikely. + See src/Engine/ExpressionEngine.h. +
+ + +
+ &engine; Functors + + QUESTION: What is an EngineFunctor? Should it + have its own section? See src/Engine/EngineFunctor.h. +
+ + +
+ <type>FieldEngine</type>: A Hierarchy of &engine;s + + A &field; consists of a hierarchy of materials and + centerings. These are implemented using a hierarchy of engines. + See src/Field/FieldEngine/FieldEngine.h + and src/Field/FieldEngine/FieldEngine.ExprEngine.h. +
+
+ + + + &benchmark; Programs + + Explain how to use &benchmark; programs, especially the + options. Explain how to write a &benchmark; program. See also + src/Utilities/Benchmark.h + and src/Utilities/Benchmark.cmpl.cpp. + + + + + + Layouts and Partitions: Distribute Computation Among + Contexts + + QUESTION: What is the difference between + ReplicatedTag and DistributedTag? + + + + + + &pete;: Evaluating Parallel Expressions + +
+ UNKNOWN + +
+ Leaf Tag Classes + + NotifyPreReadTag indicates a term is about to + be read. Why is this needed? Defined in src/Utilities/NotifyPreRead.h. +
+
+ +
+ + + + Views + + QUESTION: Should this have its own chapter or be part of a + container chapter? + + Describe View0, View1, …, + View7 and View1Implementation. + + QUESTION: What causes the need for AltView0 and + AltComponentView? + + Be sure to describe ComponentView in the same + place. This is specialized for &array;s in src/Array/Array.h:1323–1382. + +
+ <type>ViewIndexer<Dim,Dim2></type> + + Defined in src/Utilities/ViewIndexer.h, this + type translates indices between a domain and a view of it. +
+
+ + + Threads + + Perhaps include information in src/Engine/DataObject.h. + + &pooma; options include UNFINISHED + + + + + + Utility Types + + TMP: What is a good order? + +
+ <type>Options</type>: Varying Run-Time Execution + + Each &pooma; executable has a Options object, + created by Pooma::initialize, storing + run-time configurable values found in argv. + Default options are found in + Options::usage. + + See src/Utilities/Options.h and + src/Utilities/Options.cmpl.cpp. + + Scatter the specific options to other parts of the + manual. +
+ +
+ Check Correctness: <type>CTAssert</type>, + <type>PAssert</type>, <type>PInsist</type>, + <type>SameType</type> + + Assertions ensure program invariants are obeyed. + CTAssert, checked at compile time, incur no run-time + cost. PAssert and PInsist are checked + to run-time, the latter producing an explanatory message if the + assertion fails. Compiling with NOCTAssert and + NOPTAssert disable these checks. Compiling with just + NOPTAssert disables only the run-time checks. + + SameType ensures, at compile-time, two types + are the same. + + These are implemented in src/Utilities/PAssert.h and + src/Utilities/PAssert.cmpl.cpp. +
+ +
+ <type>Clock</type>: Measuring a Program's Execution Time + + See src/Utilities/Clock.h. +
+ + +
+ Smart Pointers: <type>RefCountedPtr</type>, + <type>RefCountedBlockPtr</type>, and + <type>DataBlockPtr</type> + + See src/Utilities/{RefCountedPtr,RefCountedBlockPtr,DataBlockPtr}.h. + src/Utilities/RefCounted.h + helps implement it. DataBlockPtr uses + &smarts;. +
+ +
+ <type>Inform</type>: Formatted Output for Multi-context + Execution + + See src/Utilities/Inform.h and src/Utilities/Inform.cmpl.cpp. +
+ +
+ <type>Statistics</type>: Report &pooma; Execution Statistics + + Collect and print execution statistics. Defined in + src/Utilities/Statistics.h. +
+ +
+ Random Numbers: <type>Unique</type> + + See src/Utilities/Unique.h. +
+
+ + + + Types for Implementing &pooma; + + TMP: What is a good order? + + Describe types defined to implement &pooma; but that users do + not directly use. This chapter has lower priority than other + chapters since users (hopefully) do not need to know about these + classes. + +
+ <type>Tester</type>: Check Implementation Correctness + + &pooma; implementation test programs frequently consist of a + series of operations followed by correctness checks. The + Tester object supports these tests, returning a + boolean whether all the correctness checks yield true. Under + verbose output, messages are printed for each test. See src/Utilities/Tester.h. +
+ +
+ <type>ElementProperties<T></type>: Properties a Type + Supports + + This traits class permits optimizations in other templated + classes. See src/Utilities/ElementProperties.h. + +
+ +
+ <type>TypeInfo<T></type>: Print a String Describing + the Type + + Print a string describing the type. Defined in src/Utilities/TypeInfo.h. It is + specialized for other types in other files, e.g., src/Engine/EngineTypeInfo.h and + src/Field/FieldTypeInfo.h. + Is this a compile-time version of RTTI? +
+ +
+ <type>LoopUtils</type>: Loop Computations at Compile Time + + At compile time, LoopUtils supports copying + between arrays and computing the dot product of arrays. See + src/Utilities/MetaProg.h. +
+ +
+ <type>ModelElement<T></type>: Wrap a Type + + A wrapper class used to differentiate overloaded functions. + Defined in src/Utilities/ModelElement.h. Used + only by &array; and DynamicArray. +
+ +
+ <type>WrappedInt<int></type>: Wrap a Number + + A wrapper class used to differentiate overloaded functions + among different integers. Defined in src/Utilities/WrappedInt.h. Is this + class deprecated? Is it even necessary? +
+ +
+ Supporting Empty Classes + + The NoInit tag class indicates certain + initializations should be skipped. Defined in src/Utilities/NoInit.h. + + FIXME: Should be macro, not function. + POOMA_PURIFY_CONSTRUCTORS generates an empty + constructor, copy constructor, and destructor to avoid &purify; + warnings. Defined in src/Utilities/PurifyConstructors.h. + +
+ +
+ <type>Pooled<T></type>: Fast Memory Allocation of + Small Blocks + + Pooled<T> speeds allocation and + deallocation of memory blocks for small objects with + type T. Defined in src/Utilities/Pooled.h, it is + implemented in src/Utilities/Pool.h and src/Utilities/Pool.cmpl.cpp. + src/Utilities/StaticPool.h + no longer seems to be used. +
+ +
+ <type>UninitializedVector<T,Dim></type>: Create + Without Initializing + + This class optimizes creation of an array of objects by + avoiding running the default constructors. Later initialization + can occur, perhaps using a loop that can be unrolled. Defined in + src/Utilities/UninitializedVector.h, + this is used only by DomainTraits. +
+
+ + + Algorithms for Implementing &pooma; + + In src/Utilities/algorithms.h, + copy, delete_back, and + delete_shiftup provide additional algorithms + using iterators. + + + + + TMP: Where do we describe these files? + + + + src/Utilities/Conform.h: tag for + checking whether terms in expression have conforming + domains + + + + src/Utilities/DerefIterator.h: + DerefIterator<T> and + ConstDerefIterator<T> automatically + dereference themselves to maintain const + correctness. + + + + src/Utilities/Observable.h, + src/Utilities/Observer.h, + and src/Utilities/ObserverEvent.h: + Observable<T>, + SingleObserveable<T>, + Observer<T>, and ObserverEvent + implement the observer pattern. What is the observer pattern? + Where is this used in the code? + + + + + +
+ + + + Future Development + +
+ Particles + + docs/ParticlesDoc.txt has + out-of-date information. + + See Section 3.2.3 of + papers/pooma.ps for an out-of-date + description. + + papers/jvwr.ps concerns mainly + particles. papers/8thSIAMPOOMAParticles.pdf, + by Julian Cummings and Bill Humphrey, concerns parallel particle + simulations. papers/iscope98linac.pdf + describes a particle beam simulation using &pooma;; it mainly + concerns particles. + +
+ Particles + + Do we want to include such a section? + + Section 3, "Sample Applications" of + papers/SiamOO98_paper.ps describes porting a + particle program written using High-Performance Fortran to + &pooma; and presumably why particles were added to &pooma;. It + also describes MC++, a Monte Carlo + neutron transport code. + +
+ +
+ + +
+ Composition of &engine;s + + The i,j-th element of the composition + ab of two arrays + a and b equals a(b(i,j)). + The composition engine tagged IndirectionTag<Array1, + Array2>, defined in src/Engine/IndirectionEngine.h is + unfinished. +
+ + +
+ Improving Consistency of Container Interfaces + +
+ Relations for &array;s + + Do &array;s currently support relations? If not, why not? + Should they be added? +
+ +
+ Supporting the Same Number of Dimensions + + &array; and &field; should support the same maximum number + of dimensions. Currently, &array;s support seven dimensions and + &field;s support only three. By definition, &dynamicarray; + supports only one dimension. + + Relations for &array;s. + + External guards for &array;s. +
+ +
+ + +
+ <function>where</function> Proxies + + QUESTION: Do we even discuss this broken + feature? Where is it used? Some related code is in + src/Array/Array.h:2511–2520. +
+ + +
+ Very Long Term Development Ideas + + Describe how to write a new configuration file. +
+ +
+ + + + Obtaining and Installing &pooma; + + ADD: Write this section, including extensive instructions + for Unix, MS Windows, and MacOS. List the configuration options. + Be sure to describe configuring for parallel execution. + +
+ Supporting Distributed Computation + + To use multiple processors with &pooma; requires installing + the &cheetah; messaging library and an underlying messaging library + such as the Message Passing Interface (&mpi;) Communications + Library or the &mm; Shared Memory Library. In this section, we + first describe how to install &mm;. Read the section only if using + &mm;, not &mpi;. Then we describe how to install &cheetah; and + configure &pooma; to use it. + +
+ Obtaining and Installing the &mm; Shared Memory Library + + &cheetah;, and thus &pooma;, can use Ralf Engelschall's &mm; + Shared Memory Library to pass messages between processors. For + example, the &author; uses this library on a two-processor + computer running &linux;. The library, available at + http://www.engelschall.com/sw/mm/, is available for free and has + been successfully tested on a variety of Unix platforms. + + We describe how to download and install the &mm; library. + + + Download the library from the &pooma; Download page + available off the &pooma; home page (&poomaHomePage;). + + + Extract the source code using tar xzvf + mm-1.1.3.tar.gz. Move into the resulting source + code directory mm-1.1.3. + + + Prepare to compile the source code by configuring it + using the configure command. To change + the default installation directory /usr/local, specify + --prefix=directory + option. The other configuration options can be listed by + specifying the --help option. Since the + &author; prefers to keep all &pooma;-related code in his + poomasubdirectory, he + uses ./configure + --prefix=${HOME}/pooma/mm-1.1.3. + + + Create the library by issuing the make + command. This compiles the source code using a &c; compiler. To + use a different compiler than the &mm; configuration chooses, set + the CC to the compiler before configuring. + + + Optionally test the library by issuing the make + test command. If successful, the penultimate line + should be OK - ALL TESTS SUCCESSFULLY + PASSED. + + + Install the &mm; Library by issuing the make + install command. This copies the library files to the + installation directory. The mm-1.1.3 directory containing the + source code may now be removed. + + + +
+ + +
+ Obtaining and Installing the &cheetah; Messaging Library + + The &cheetah; Library decouples communication from + synchronization. Using asynchronous messaging rather than + synchronous messaging permits a message sender to operate without + the cooperation of the message recipient. Thus, implementing + message sending is simpler and processing is more efficiently + overlapped with it. Remote method invocation is also supported. + The library was developed at the Los Alamos National Laboratory's + Advanced Computing Laboratory. + + &cheetah;'s messaging is implemented using an underlying + messaging library such as the Message Passing Interface (&mpi;) + Communications Library (FIXME: xref linkend="mpi99", ) or the &mm; + Shared Memory Library. &mpi; works on a wide variety of platforms + and has achieved widespread usage. &mm; works under Unix on any + computer with shared memory. Both libraries are available for + free. The instructions below work for whichever library you + choose. + + We describe how to download and install &cheetah;. + + + Download the library from the &pooma; Download page + available off the &pooma; home page (&poomaHomePage;). + + + Extract the source code using tar xzvf + cheetah-1.0.tgz. Move into the resulting source code + directory cheetah-1.0. + + + Edit a configuration file corresponding to your operating + system and compiler. These .conf files are located in the + config directory. For + example, to use &gcc; with the &linux; operating system, use + config/LINUXGCC.conf. + + The configuration file usually does not need + modification. However, if you are using &mm;, ensure + shmem_default_dir specifies its location. + For example, the &author; modified the value to + "/home/oldham/pooma/mm-1.1.3". + + + Prepare to compile the source code by configuring it + using the configure command. Specify the + configuration file using the --arch option. + Its argument should be the configuration file's name, omitting + its .conf suffix. For + example, --arch LINUXGCC. Some other + options include + + + --help + + lists all the available options + + + + --shmem --nompi + + indicates use of &mm;, not &mpi; + + + + --mpi --noshmem + + indicates use of &mpi;, not &mm; + + + + --opt + + causes the compiler to produce optimized source code + + + + --noex + + prevents use of &cc; exceptions + + + + --prefix directory + + specifies the installation directory where the + library will be copied rather than the default. + + + + For example, the &author; uses ./configure --arch + LINUXGCC --shmem --nompi --noex --prefix + ${HOME}/pooma/cheetah-1.0 --opt. The + --arch LINUXGCC indicates use of &gcc; + under a &linux; operating system. The &mm; library is used, + but &cc; exceptions are not. The latter choice matches + &pooma;'s default choice. The library will be installed in + the ${HOME}/pooma/cheetah-1.0. + Finally, the library code will be optimized, hopefully running + faster than unoptimized code. + + + Follow the directions printed by + configure: Change directories to the + lib subdirectory named + by the --arch argument and then type + make to compile the source code and create + the library. + + + Optionally ensure the library works correctly by issuing + the make tests command. + + + Install the library by issuing the make + install command. This copies the library files to + the installation directory. The cheetah-1.0 directory containing + the source code may now be removed. + + + +
+ +
+ Configuring &pooma; When Using &cheetah; + + To use &pooma; with &cheetah;, one must tell &pooma; the + location of the &cheetah; library using the + --messaging configuration option. To do this, + + + Set the &cheetah; directory environment variable + CHEETAHDIR to the directory containing the + installed &cheetah; library. For + example, declare -x + CHEETAHDIR=${HOME}/pooma/cheetah-1.0 specifies the + installation directory used in the previous section. + + + When configuring &pooma;, specify the + --messaging option. For example, + ./configure --arch LINUXgcc --opt + --messaging configures for &linux;, &gcc;, and an + optimized library using &cheetah;. + + + +
+ + +
+
+ + + + Dealing with Compilation Errors + + Base this low-priority section on errors.html. QUESTION: Where is + errors.html? + + + + + + TMP: Notes to Myself + +
+ Miscellaneous + + + + QUESTION: How do I know when to use a type name versus just + the concept? For example, when do I use array + versus &array;? + + + + Krylov solvers are described in Section 3.5.2 of + papers/pooma.ps. + + + + Section 5, "The Polygon Overlay Problem," describes + porting an ANSI &c; program to &pooma;. + + + + A good example book: STL Tutorial and Reference + Guide: &cc; Programming with the Standard Template + Library, second edition, by David R. Musser, + Gillmer J. Derge, and Atul Sanai, ISBN 0-201-37923-6, + QA76.73.C153.M87 2001. + + + + One STL reference book listed functions in margin notes, + easing finding material. Do this. + + + + QUESTION: Does Berna Massingill at Trinity University have + any interest ior access to any parallel computers? + + + +
+ + +
+ Existing HTML Tutorials + + All these tutorials are out-of-date, but the ideas and text + may still be relevant. + + + index.html + list of all tutorials. No useful + material. + + introduction.html + data-parallel Laplace solver using Jacobi + iteration ala Doof2d + + background.html + short, indirect introduction to &pete;; parallel + execution model; &cc;; templates; &stl;; expression + templates + + tut-01.html + UNFINISHED + + Layout.html + UNFINISHED + + parallelism.html + UNFINISHED + + self-test.html + UNFINISHED + + threading.html + UNFINISHED + + tut-03.html + UNFINISHED + + tut-04.html + UNFINISHED + + tut-05.html + UNFINISHED + + tut-06.html + UNFINISHED + + tut-07.html + UNFINISHED + + tut-08.html + UNFINISHED + + tut-09.html + UNFINISHED + + tut-10.html + UNFINISHED + + tut-11.html + UNFINISHED + + tut-12.html + UNFINISHED + + tut-13.html + UNFINISHED + + + +
+ +
+ + + + + + Bibliography + + FIXME: How do I process these entries? + + + mpi99 + + + WilliamGropp + + + EwingLusk + + + AnthonySkjellum + + + + 1999 + Massachusetts Institute of Technology + + 0-262-57132-3 + + The MIT Press +
Cambridge, MA
+
+ Using MPI + Portable Parallel Programming with the Message-Passing Interface + second edition +
+
+ + + + + + Glossary + + ADD: Make sure all entries are indexed and perhaps point back + to their first use. WARNING: This is constructed by hand so it is + likely to be full of inconsistencies and errors. + + + S + + + Suite Name + + An arbitrary string denoting a particular toolkit + configuration. For example, the string + SUNKCC-debug might indicate a configuration for + the Sun Solaris + operating system and the &kcc; &cc; compiler with debugging + support. By default, the suite name it is equal to the + configuration's architecture name. + + + + + + + + + &genindex.sgm; + +
Index: docs/manual/figures/distributed.mp =================================================================== RCS file: distributed.mp diff -N distributed.mp *** /dev/null Fri Mar 23 21:37:44 2001 --- distributed.mp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,195 ---- + %% Oldham, Jeffrey D. + %% 2001Nov28 + %% Pooma + + %% Illustrations for Distributed Computing + + %% Assumes TEX=latex. + + input boxes; + + verbatimtex + \documentclass[10pt]{article} + \begin{document} + etex + + %% Parts of Distributed Computation + beginfig(101) + numeric unit; unit = 0.9cm; + + %% Create the Container Storage Partition subfigure. + numeric arrayWidth; arrayWidth = 2; % as multiple of unit + numeric arrayHeight; arrayHeight = 4; % as multiple of unit + numeric guardWidth; guardWidth = 0.1; % as multiple of unit + numeric patchWidth; patchWidth = arrayWidth/3; % as multiple of unit + numeric patchHeight; patchHeight = arrayHeight/2; % as multiple of unit + numeric xPatchDistance; xPatchDistance = 0.9patchWidth; % as multiple of unit + numeric yPatchDistance; yPatchDistance = 0.5patchWidth; % as multiple of unit + numeric arrayPartitionDistance; arrayPartitionDistance = arrayWidth; + % distance between array and partition + numeric arrayLayoutDistance; arrayLayoutDistance = 0.5arrayHeight; + % distance between array and layout + numeric arrowLayoutDistance; arrowLayoutDistance = 0.5arrayLayoutDistance; + % distance between arrow and top of layout, not its label + numeric iota; iota = labeloffset; + numeric storageBoundaryWidth; storageBoundaryWidth = 1; % as multiple of unit + % gap between storage box and its internals + + % Create the Array. Use box "a". + boxit.a(); + a.ne - a.sw = unit*(arrayWidth,arrayHeight); + + % Create the partition. Use boxes "p[]". + for t = 0 upto 5: + boxit.p[t](); + p[t].ne - p[t].sw = unit*(1,1); + endfor; + for t = 0 upto 2: + p[t].sw=p[t+3].nw; + p[t].se=p[t+3].ne; + if t < 2: + p[t].ne = p[t+1].nw; + p[t+3].ne = p[t+4].nw; + fi + endfor; + boxit.pt(btex \begin{tabular}{c} + \\ external guard layers \\ + \\ internal guard layers \end{tabular} etex); + pt.n = p[4].s; + + % Create the layout patches "l[]" and their guard layers "g[]". + for t = 0 upto 5: + boxit.l[t](); + boxit.g[t](); + l[t].ne - l[t].sw = unit*(patchWidth, patchHeight); + g[t].ne - l[t].ne = -(g[t].sw - l[t].sw) = unit*guardWidth*(1,1); + endfor + for t = 0 upto 2: + if t < 2: + g[t+1].nw - g[t].ne = unit*(xPatchDistance,0); + fi + g[t].sw - g[t+3].nw = unit*(0,yPatchDistance); + endfor; + + % Create the storage equation boxes. + boxit.containerPlus(btex + etex); + boxit.containerArrow(btex $\Big\Downarrow$ etex); + + % Position the storage pieces. + p[0].nw - a.ne = unit*(arrayPartitionDistance,0); + containerPlus.c = (xpart(0.5[a.ne,p[0].nw]), ypart(a.c)); + containerArrow.c = (xpart(containerPlus.c), ypart(l[1].n) + unit*arrowLayoutDistance); + ypart(a.s - l[1].n) = unit*arrayLayoutDistance; + xpart(containerPlus.c - l[1].n) = 0; + + % Create a boundary box around storage partition. + boxit.storageBoundary(); + ypart(storageBoundary.n - a.n) = + ypart(l[4].s - storageBoundary.s) = unit*2storageBoundaryWidth; + xpart(a.w - storageBoundary.w) = unit*storageBoundaryWidth; + xpart(storageBoundary.e - pt.e) = unit*storageBoundaryWidth; + + %% Create the Computer Configuration subfigure. + numeric configurationBoundaryWidth; configurationBoundaryWidth = storageBoundaryWidth; + % gap between computer configuration box and its internals + + for t = 0 upto 2: + circleit.c[t](); + c[t].n - c[t].s = 1.3(0,ypart(g[0].ne - g[3].sw)); + c[t].e - c[t].w = 1.5(xpart(g[0].ne - g[3].sw),0); + endfor + c[2].c - c[1].c = c[1].c - c[0].c = g[1].c - g[0].c; + + boxit.configurationBoundary(); + ypart(configurationBoundary.n - configurationBoundary.s) = + ypart(storageBoundary.n - storageBoundary.s); + xpart(configurationBoundary.e - configurationBoundary.w) = + xpart(c[2].e - c[0].w)+2*unit*configurationBoundaryWidth; + configurationBoundary.c = c[1].c; + + %% Create the Computation Configuration subfigure. + % Create the patches. + for t = 0 upto 5: + boxit.L[t](); + boxit.G[t](); + L[t].ne - L[t].sw = unit*(patchWidth, patchHeight); + G[t].ne - L[t].ne = -(G[t].sw - L[t].sw) = unit*guardWidth*(1,1); + endfor + for t = 0 upto 2: + if t < 2: + G[t+1].nw - G[t].ne = unit*(xPatchDistance,0); + fi + G[t].sw - G[t+3].nw = unit*(0,yPatchDistance); + endfor; + + % Create the contexts. + for t = 0 upto 2: + circleit.C[t](); + C[t].n - C[t].s = 1.3(0,ypart(G[0].ne - G[3].sw)); + C[t].e - C[t].w = 1.5(xpart(G[0].ne - G[3].sw),0); + endfor + C[2].c - C[1].c = C[1].c - C[0].c = G[1].c - G[0].c; + C[0].c = 0.5[G[0].c,G[3].c]; + + %% Relate the subfigures. + numeric containerConfigurationDistance; + containerConfigurationDistance = arrayPartitionDistance; + % distance between container storage and computer configuration subfigures + numeric containerComputationDistance; containerComputationDistance = arrayLayoutDistance; + % distance between container storage subfigure and computation configuration subfigure + numeric arrowComputationDistance; arrowComputationDistance = arrowLayoutDistance; + % distance between arrow and top of computation configuration, not its label + + boxit.figurePlus(btex + etex); + boxit.figureArrow(btex $\Big\Downarrow$ etex); + + configurationBoundary.w - storageBoundary.e = + unit*(containerConfigurationDistance,0); %% HERE + figurePlus.c = 0.5[configurationBoundary.w, storageBoundary.e]; + figureArrow.c = (xpart(0.5[configurationBoundary.e,storageBoundary.w]), + ypart(C[1].n) + unit*arrowComputationDistance); + + 0.5[configurationBoundary.se,storageBoundary.sw] - C[1].n = + unit*(0,containerComputationDistance); + + %% Draw the Container Domain Partitioning structures. + drawboxed(a); label.top(btex \begin{tabular}{c} container's\\ domain \end{tabular} etex, a.n); + for t = 0 upto 5: + drawboxed(p[t]); + endfor + drawunboxed(pt); + label.top(btex partition etex, p[1].n); + for t = 0 upto 5: + drawboxed(l[t],g[t]); + endfor + label.top(btex patches etex, g[1].n); + z0 = g[2].e + unit*(1,0); + drawarrow z0 -- (g[2].e+(iota,0)); + label.rt(btex \begin{tabular}{l} patch with\\guard cells \end{tabular} etex, z0); + drawunboxed(containerPlus,containerArrow); + drawboxed(storageBoundary); + label.top(btex Partition Container's Domain etex, storageBoundary.n); + + %% Draw the Computer Configuration structures. + for t = 0 upto 2: + drawboxed(c[t]); + endfor + label.top(btex contexts etex, c[1].n); + label.bot(btex \begin{tabular}{c} Each context has memory and\\ processors to execute a program. \end{tabular} etex, c[1].s); + drawboxed(configurationBoundary); + label.top(btex Computer Configuration etex, configurationBoundary.n); + + %% Draw the Computer Computation structures. + for t = 0 upto 5: + drawboxed(L[t],G[t]); + endfor + for t = 0 upto 2: + drawboxed(C[t]); + endfor + label.top(btex Layout etex, C[1].n); + label.bot(btex Each context can contain several patches. etex, C[1].s); + + %% Draw the subfigure relations structures. + drawunboxed(figurePlus,figureArrow); + label.rt(btex DistributedTag etex, figureArrow.e); + endfig; + + bye Index: docs/manual/figures/doof2d.mp =================================================================== RCS file: doof2d.mp diff -N doof2d.mp *** /dev/null Fri Mar 23 21:37:44 2001 --- doof2d.mp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,257 ---- + %% Oldham, Jeffrey D. + %% 2001Nov26 + %% Pooma + + %% Illustrations for the Tutorial Chapter (Chapter 2) + + %% Assumes TEX=latex. + + verbatimtex + \documentclass[10pt]{article} + \begin{document} + etex + + % Draw a set of grid cells. + vardef drawGrid(expr nuCells, unit, llCorner) = + for i = 0 upto nuCells-1: + for j = 0 upto nuCells-1: + draw unitsquare scaled unit shifted (llCorner + unit*(i,j)); + endfor + endfor + enddef; + + % Label the specified grid, grid cell, or its edge. + % Place a value at the center of a grid cell. + vardef labelCell(expr lbl, xy, llCorner) = + label(lbl, llCorner + unit*(xy + 0.5*(1,1))); + enddef; + + % Label the bottom of a grid cell. + vardef labelCellBottom(expr lbl, xy, llCorner) = + label.bot(lbl, llCorner + unit*(xy + 0.5*(1,0))); + enddef; + + % Label the left side of a grid cell. + vardef labelCellLeft(expr lbl, xy, llCorner) = + label.lft(lbl, llCorner + unit*(xy + 0.5*(0,1))); + enddef; + + % Label the top of a grid. + vardef labelGrid(expr lbl, nuCells, llCorner) = + label.top(lbl, llCorner + unit*(nuCells/2,nuCells)); + enddef; + + %% Global Declarations + numeric unit; unit = 0.9cm; % width or height of an individual grid cell + + + %% Initial Configuration. + beginfig(201) + numeric nuCells; nuCells = 7; % number of cells in each dimension + % This number should be odd. + % Draw the grid cells. + drawGrid(nuCells, unit, origin); + + % Label the grid cells' values. + for i = 0 upto nuCells-1: + for j = 0 upto nuCells-1: + if ((i = nuCells div 2) and (j = nuCells div 2)): + labelCell(btex \footnotesize 1000.0 etex, (i,j), origin); + else: + labelCell(btex \footnotesize 0.0 etex, (i,j), origin); + fi + endfor + endfor + + % Label the grid. + labelGrid(btex Array \texttt{b}: Initial Configuration etex, nuCells, origin); + endfig; + + + %% After the first averaging. + beginfig(202) + numeric nuCells; nuCells = 7; % number of cells in each dimension + % This number should be odd. + % Draw the grid cells. + drawGrid(nuCells, unit, origin); + + % Label the grid cells' values. + for i = 0, 1, nuCells-2, nuCells-1: + for j = 0 upto nuCells-1: + labelCell(btex \footnotesize 0.0 etex, (i,j), origin); + endfor + endfor + for j = 0, 1, nuCells-2, nuCells-1: + for i = 0 upto nuCells-1: + labelCell(btex \footnotesize 0.0 etex, (i,j), origin); + endfor + endfor + for i = (nuCells div 2)-1 upto (nuCells div 2)+1: + for j = (nuCells div 2)-1 upto (nuCells div 2)+1: + labelCell(btex \footnotesize 111.1 etex, (i, j), origin); + endfor + endfor + + % Label the grid. + labelGrid(btex Array \texttt{a}: After the first averaging etex, nuCells, origin); + endfig; + + + %% After the second averaging. + beginfig(203) + numeric nuCells; nuCells = 7; % number of cells in each dimension + % This number should be odd. + % Draw the grid cells. + drawGrid(nuCells, unit, origin); + + % Label the grid cells' values. + for i = 0, nuCells-1: + for j = 0 upto nuCells-1: + labelCell(btex \footnotesize 0.0 etex, (i,j), origin); + endfor + endfor + for j = 0, nuCells-1: + for i = 0 upto nuCells-1: + labelCell(btex \footnotesize 0.0 etex, (i,j), origin); + endfor + endfor + labelCell(btex \footnotesize 111.1 etex, (3,3), origin); + for t = (3,2), (4,3), (3,4), (2,3): + labelCell(btex \footnotesize 74.1 etex, t, origin); + endfor + for t = (2,2), (2,4), (4,4), (4,2): + labelCell(btex \footnotesize 49.4 etex, t, origin); + endfor + for t = (3,1), (5,3), (3,5), (1,3): + labelCell(btex \footnotesize 37.0 etex, t, origin); + endfor + for t = (1,2), (2,1), (4,1), (5,2), (5,4), (4,5), (2,5), (1,4): + labelCell(btex \footnotesize 24.7 etex, t, origin); + endfor + for t = (1,1), (5,1), (5,5), (1,5): + labelCell(btex \footnotesize 12.3 etex, t, origin); + endfor + + % Label the grid. + labelGrid(btex Array \texttt{b}: After the second averaging etex, nuCells, origin); + endfig; + + + %% Illustrate addition of arrays. + beginfig(210) + numeric nuCells; nuCells = 3; % number of cells in each dimension + % This number should be odd. + numeric operatorWidth; operatorWidth = 1.5; + % horizontal space for an operator as + % a multiple of "unit" + + %% Determine the locations of the arrays. + z0 = origin; + z1 = z0 + unit * (nuCells+operatorWidth,0); + z2 - z1 = z1 - z0; + + %% Draw the grid cells and the operators. + for t = 0 upto 2: + drawGrid(nuCells, unit, z[t]); + endfor + label(btex = etex, z1 + unit*(-0.9operatorWidth, 0.5nuCells)); + label(btex + etex, z2 + unit*(-0.9operatorWidth, 0.5nuCells)); + + %% Label the grid cells' values. + % Label b(I,J) grid values. + labelCell(btex \normalsize 9 etex, (0,0), z1); + labelCell(btex \normalsize 11 etex, (1,0), z1); + labelCell(btex \normalsize 13 etex, (2,0), z1); + labelCell(btex \normalsize 17 etex, (0,1), z1); + labelCell(btex \normalsize 19 etex, (1,1), z1); + labelCell(btex \normalsize 21 etex, (2,1), z1); + labelCell(btex \normalsize 25 etex, (0,2), z1); + labelCell(btex \normalsize 27 etex, (1,2), z1); + labelCell(btex \normalsize 29 etex, (2,2), z1); + % Label b(I+1,J-1) grid values. + labelCell(btex \normalsize 3 etex, (0,0), z2); + labelCell(btex \normalsize 5 etex, (1,0), z2); + labelCell(btex \normalsize 7 etex, (2,0), z2); + labelCell(btex \normalsize 11 etex, (0,1), z2); + labelCell(btex \normalsize 13 etex, (1,1), z2); + labelCell(btex \normalsize 15 etex, (2,1), z2); + labelCell(btex \normalsize 19 etex, (0,2), z2); + labelCell(btex \normalsize 21 etex, (1,2), z2); + labelCell(btex \normalsize 23 etex, (2,2), z2); + % Label b(I,J)+b(I+1,J-1) grid values. + labelCell(btex \normalsize 12 etex, (0,0), z0); + labelCell(btex \normalsize 16 etex, (1,0), z0); + labelCell(btex \normalsize 20 etex, (2,0), z0); + labelCell(btex \normalsize 28 etex, (0,1), z0); + labelCell(btex \normalsize 32 etex, (1,1), z0); + labelCell(btex \normalsize 36 etex, (2,1), z0); + labelCell(btex \normalsize 34 etex, (0,2), z0); + labelCell(btex \normalsize 38 etex, (1,2), z0); + labelCell(btex \normalsize 42 etex, (2,2), z0); + + %% Label the indices. + % Label b(I,J) grid indices. + labelCellBottom(btex \footnotesize 1 etex, (0,0), z1); + labelCellBottom(btex \footnotesize 2 etex, (1,0), z1); + labelCellBottom(btex \footnotesize 3 etex, (2,0), z1); + labelCellLeft(btex \footnotesize 1 etex, (0,0), z1); + labelCellLeft(btex \footnotesize 2 etex, (0,1), z1); + labelCellLeft(btex \footnotesize 3 etex, (0,2), z1); + % Label b(I+1,J-1) grid indices. + labelCellBottom(btex \footnotesize 2 etex, (0,0), z2); + labelCellBottom(btex \footnotesize 3 etex, (1,0), z2); + labelCellBottom(btex \footnotesize 4 etex, (2,0), z2); + labelCellLeft(btex \footnotesize 0 etex, (0,0), z2); + labelCellLeft(btex \footnotesize 1 etex, (0,1), z2); + labelCellLeft(btex \footnotesize 2 etex, (0,2), z2); + + %% Label the grids. + labelGrid(btex $b(I,J)+b(I+1,J-1)$ etex, nuCells, z0); + labelGrid(btex $b(I,J)$ etex, nuCells, z1); + labelGrid(btex $b(I+1,J-1)$ etex, nuCells, z2); + endfig; + + + %% Illustrate application of a stencil. + beginfig(211) + numeric nuCells; nuCells = 5; % number of cells in each dimension + numeric nuStencilCells; nuStencilCells = 3; + % number of stencil cells in each dimension + numeric stencilMultiple; stencilMultiple = 0.1; + % small multiple to make it visible + + % Draw the grid cells. + drawGrid(nuCells, unit, origin); + + % Draw the stencil. + draw unitsquare scaled ((nuStencilCells-2stencilMultiple) * unit) shifted (unit*(stencilMultiple*(1,1)+(0,2))); + draw (unitsquare scaled ((1-stencilMultiple) * unit) shifted (unit*(0.5*stencilMultiple*(1,1)+(1,3)))) dashed evenly; + + % Label the extents. + picture lbl; + ahlength := 0.4unit; + drawarrow unit*(2,4) -- unit*(3,5); + lbl = thelabel.lrt(btex \scriptsize upperExtent etex, unit*0.5[(2,4),(3,5)]); + unfill bbox lbl; draw lbl; + drawarrow unit*(1,3) -- unit*(0,2); + lbl := thelabel.lrt(btex \scriptsize lowerExtent etex, unit*0.5[(1,3),(0,2)]); + unfill bbox lbl; draw lbl; + + % Label the indices. + labelCellBottom(btex \footnotesize 0 etex, (0,0), origin); + labelCellBottom(btex \footnotesize 1 etex, (1,0), origin); + labelCellBottom(btex \footnotesize 2 etex, (2,0), origin); + labelCellBottom(btex \footnotesize 3 etex, (3,0), origin); + labelCellBottom(btex \footnotesize 4 etex, (4,0), origin); + labelCellLeft(btex \footnotesize 0 etex, (0,0), origin); + labelCellLeft(btex \footnotesize 1 etex, (0,1), origin); + labelCellLeft(btex \footnotesize 2 etex, (0,2), origin); + labelCellLeft(btex \footnotesize 3 etex, (0,3), origin); + labelCellLeft(btex \footnotesize 4 etex, (0,4), origin); + + % Label the grid. + labelGrid(btex Applying a Stencil to Position (1,3) etex, nuCells, origin); + + endfig; + + bye Index: docs/manual/figures/makefile =================================================================== RCS file: makefile diff -N makefile *** /dev/null Fri Mar 23 21:37:44 2001 --- makefile Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,69 ---- + ### Oldham, Jeffrey D. + ### 1997 Dec 26 + ### misc + ### + ### LaTeX -> PostScript/PDF/WWW + ### XML -> TeX/DVI/PS/PDF + + # Definitions for PostScript and WWW Creation + TEX= latex + WWWHOMEDIR= /u/oldham/www + LATEX2HTML= latex2html + BASICLATEX2HTMLOPTIONS= -info "" -no_footnode -no_math -html_version 3.2,math + #LATEX2HTMLOPTIONS= -local_icons -split +1 $(BASICLATEX2HTMLOPTIONS) + LATEX2HTMLOPTIONS= -no_navigation -split 0 $(BASICLATEX2HTMLOPTIONS) + MPOST= mpost + + # Definitions for Jade. + JADEDIR= /usr/lib/sgml/stylesheets/docbook + PRINTDOCBOOKDSL= print/docbook.dsl + HTMLDOCBOOKDSL= html/docbook.dsl + XML= dtds/decls/xml.dcl + INDEXOPTIONS= -t 'Index' -i 'index' -g -p + + CXXFLAGS= -g -Wall -pedantic -W -Wstrict-prototypes -Wpointer-arith -Wbad-function-cast -Wcast-align -Wconversion -Wnested-externs -Wundef -Winline -static + + all: outline.ps + + %.all: %.ps %.pdf %.html + chmod 644 $*.ps $*.pdf + mv $*.ps $*.pdf $* + + %.dvi: %.ltx + $(TEX) $< + # bibtex $* + # $(TEX) $< + $(TEX) $< + + %.ps: %.dvi + dvips -t letter $< -o + + %.pdf.ltx: %.ltx + sed -e 's/^%\\usepackage{times}/\\usepackage{times}/' $< > $@ + + %.pdf: %.pdf.ps + ps2pdf $< $@ + + # This rule assumes index creation. + %.dvi: %.xml genindex.sgm + jade -D$(JADEDIR) -t sgml -d $(HTMLDOCBOOKDSL) -V html-index $(XML) $< + perl collateindex.pl $(INDEXOPTIONS) -o genindex.sgm HTML.index + jade -D$(JADEDIR) -t tex -d $(PRINTDOCBOOKDSL) $(XML) $< && jadetex $*.tex && jadetex $*.tex && jadetex $*.tex + + genindex.sgm: + perl collateindex.pl $(INDEXOPTIONS) -N -o $@ + + %.html: %.xml + jade -D$(JADEDIR) -t sgml -d $(HTMLDOCBOOKDSL) $(XML) $< + + %.pdf: %.xml + jade -D$(JADEDIR) -t tex -d $(PRINTDOCBOOKDSL) $(XML) $< && pdfjadetex $*.tex && pdfjadetex $*.tex + + mproof-%.ps: %.mp + declare -x TEX=latex && $(MPOST) $< && tex mproof.tex $*.[0-9]* && dvips mproof.dvi -o $@ + + %.txt: %.ltx + detex $< > $@ + + clean: + rm -f *.dvi *.aux *.log *.toc *.bak *.blg *.bbl *.glo *.idx *.lof *.lot *.htm *.mpx mpxerr.tex HTML.index outline.tex Index: docs/manual/programs/Doof2d-Array-distributed-annotated.patch =================================================================== RCS file: Doof2d-Array-distributed-annotated.patch diff -N Doof2d-Array-distributed-annotated.patch *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-distributed-annotated.patch Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,162 ---- + *** Doof2d-Array-distributed.cpp Wed Nov 28 07:46:56 2001 + --- Doof2d-Array-distributed-annotated.cpp Wed Nov 28 07:53:31 2001 + *************** + *** 1,4 **** + ! #include // has std::cout, ... + ! #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + --- 1,5 ---- + ! + ! #include <iostream> // has std::cout, ... + ! #include <stdlib.h> // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + *************** + *** 15,19 **** + // (i,j). The "C" template parameter permits use of this stencil + // operator with both Arrays and Fields. + ! template + inline + typename C::Element_t + --- 16,20 ---- + // (i,j). The "C" template parameter permits use of this stencil + // operator with both Arrays and Fields. + ! template <class C> + inline + typename C::Element_t + *************** + *** 40,52 **** + Pooma::initialize(argc,argv); + + ! // Ask the user for the number of processors. + long nuProcessors; + ! std::cout << "Please enter the number of processors: "; + ! std::cin >> nuProcessors; + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + --- 41,53 ---- + Pooma::initialize(argc,argv); + + ! // Ask the user for the number of processors. + long nuProcessors; + ! std::cout << "Please enter the number of processors: "; + ! std::cin >> nuProcessors; + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + *************** + *** 54,67 **** + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<2> interiorDomain(I,I); + + // Create the distributed arrays. + --- 55,68 ---- + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<2> interiorDomain(I,I); + + // Create the distributed arrays. + *************** + *** 70,85 **** + // dimension. Guard layers optimize communication between patches. + // Internal guards surround each patch. External guards surround + ! // the entire array domain. + ! UniformGridPartition<2> partition(Loc<2>(nuProcessors, nuProcessors), + ! GuardLayers<2>(1), // internal + ! GuardLayers<2>(0)); // external + ! UniformGridLayout<2> layout(vertDomain, partition, DistributedTag()); + + // The template parameters indicate 2 dimensions and a 'double' + // element type. MultiPatch indicates multiple computation patches, + // i.e., distributed computation. The UniformTag indicates the + ! // patches should have the same size. Each patch has Brick type. + ! Array<2, double, MultiPatch > > a(layout); + ! Array<2, double, MultiPatch > > b(layout); + + // Set up the initial conditions. + --- 71,86 ---- + // dimension. Guard layers optimize communication between patches. + // Internal guards surround each patch. External guards surround + ! // the entire array domain. + ! UniformGridPartition<2> partition(Loc<2>(nuProcessors, nuProcessors), + ! GuardLayers<2>(1), // internal + ! GuardLayers<2>(0)); // external + ! UniformGridLayout<2> layout(vertDomain, partition, DistributedTag()); + + // The template parameters indicate 2 dimensions and a 'double' + // element type. MultiPatch indicates multiple computation patches, + // i.e., distributed computation. The UniformTag indicates the + ! // patches should have the same size. Each patch has Brick type. + ! Array<2, double, MultiPatch<UniformTag, Remote<Brick> > > a(layout); + ! Array<2, double, MultiPatch<UniformTag, Remote<Brick> > > b(layout); + + // Set up the initial conditions. + *************** + *** 89,97 **** + + // Create the stencil performing the computation. + ! Stencil stencil; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + --- 90,98 ---- + + // Create the stencil performing the computation. + ! Stencil<DoofNinePt> stencil; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + *************** + *** 102,106 **** + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + --- 103,107 ---- + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + *************** + *** 110,111 **** + --- 111,113 ---- + return EXIT_SUCCESS; + } + + Index: docs/manual/programs/Doof2d-Array-element-annotated.patch =================================================================== RCS file: Doof2d-Array-element-annotated.patch diff -N Doof2d-Array-element-annotated.patch *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-element-annotated.patch Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,144 ---- + *** Doof2d-Array-element.cpp Tue Nov 27 11:04:04 2001 + --- Doof2d-Array-element-annotated.cpp Tue Nov 27 12:06:32 2001 + *************** + *** 1,5 **** + ! #include // has std::cout, ... + ! #include // has EXIT_SUCCESS + ! #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, element-wise implementation + --- 1,6 ---- + ! + ! #include <iostream> // has std::cout, ... + ! #include <stdlib.h> // has EXIT_SUCCESS + ! #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, element-wise implementation + *************** + *** 7,17 **** + int main(int argc, char *argv[]) + { + ! // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + --- 8,18 ---- + int main(int argc, char *argv[]) + { + ! // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + *************** + *** 19,37 **** + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + ! // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + ! // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + ! // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + --- 20,38 ---- + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + ! // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + ! // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + ! // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + *************** + *** 41,49 **** + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + ! a(i,j) = weight * + (b(i+1,j+1) + b(i+1,j ) + b(i+1,j-1) + + b(i ,j+1) + b(i ,j ) + b(i ,j-1) + + --- 42,50 ---- + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + ! a(i,j) = weight * + (b(i+1,j+1) + b(i+1,j ) + b(i+1,j-1) + + b(i ,j+1) + b(i ,j ) + b(i ,j-1) + + *************** + *** 51,56 **** + + // Read from a. Write to b. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + b(i,j) = weight * + (a(i+1,j+1) + a(i+1,j ) + a(i+1,j-1) + + --- 52,57 ---- + + // Read from a. Write to b. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + b(i,j) = weight * + (a(i+1,j+1) + a(i+1,j ) + a(i+1,j-1) + + *************** + *** 60,70 **** + + // Print out the final central value. + ! Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + ! // The arrays are automatically deallocated. + + ! // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } + --- 61,72 ---- + + // Print out the final central value. + ! Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + ! // The arrays are automatically deallocated. + + ! // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } + + Index: docs/manual/programs/Doof2d-Array-parallel-annotated.patch =================================================================== RCS file: Doof2d-Array-parallel-annotated.patch diff -N Doof2d-Array-parallel-annotated.patch *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-parallel-annotated.patch Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,106 ---- + *** Doof2d-Array-parallel.cpp Tue Nov 27 13:00:09 2001 + --- Doof2d-Array-parallel-annotated.cpp Tue Nov 27 14:07:07 2001 + *************** + *** 1,4 **** + ! #include // has std::cout, ... + ! #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + --- 1,5 ---- + ! + ! #include <iostream> // has std::cout, ... + ! #include <stdlib.h> // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + *************** + *** 12,17 **** + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + --- 13,18 ---- + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + *************** + *** 19,38 **** + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + ! // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<1> J(1,n-2); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + --- 20,39 ---- + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + ! // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<1> J(1,n-2); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + *************** + *** 45,50 **** + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(I,J) = weight * + (b(I+1,J+1) + b(I+1,J ) + b(I+1,J-1) + + --- 46,51 ---- + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(I,J) = weight * + (b(I+1,J+1) + b(I+1,J ) + b(I+1,J-1) + + *************** + *** 61,65 **** + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + --- 62,66 ---- + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + *************** + *** 69,70 **** + --- 70,72 ---- + return EXIT_SUCCESS; + } + + Index: docs/manual/programs/Doof2d-Array-stencil-annotated.patch =================================================================== RCS file: Doof2d-Array-stencil-annotated.patch diff -N Doof2d-Array-stencil-annotated.patch *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-stencil-annotated.patch Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,152 ---- + *** Doof2d-Array-stencil.cpp Tue Nov 27 17:23:41 2001 + --- Doof2d-Array-stencil-annotated.cpp Tue Nov 27 17:36:13 2001 + *************** + *** 1,9 **** + ! #include // has std::cout, ... + ! #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, stencil implementation + + ! // Define the stencil class performing the computation. + class DoofNinePt + { + --- 1,10 ---- + ! + ! #include <iostream> // has std::cout, ... + ! #include <stdlib.h> // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, stencil implementation + + ! // Define the stencil class performing the computation. + class DoofNinePt + { + *************** + *** 14,19 **** + // This stencil operator is applied to each interior domain position + // (i,j). The "C" template parameter permits use of this stencil + ! // operator with both Arrays and Fields. + ! template + inline + typename C::Element_t + --- 15,20 ---- + // This stencil operator is applied to each interior domain position + // (i,j). The "C" template parameter permits use of this stencil + ! // operator with both Arrays and Fields. + ! template <class C> + inline + typename C::Element_t + *************** + *** 26,30 **** + } + + ! inline int lowerExtent(int) const { return 1; } + inline int upperExtent(int) const { return 1; } + + --- 27,31 ---- + } + + ! inline int lowerExtent(int) const { return 1; } + inline int upperExtent(int) const { return 1; } + + *************** + *** 42,47 **** + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + --- 43,48 ---- + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + *************** + *** 49,68 **** + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<2> interiorDomain(I,I); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + --- 50,69 ---- + // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + ! Interval<1> N(0, n-1); + ! Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + ! Interval<1> I(1,n-2); + ! Interval<2> interiorDomain(I,I); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + ! Array<2, double, Brick> a(vertDomain); + ! Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + *************** + *** 71,80 **** + b(n/2,n/2) = 1000.0; + + ! // Create the stencil performing the computation. + ! Stencil stencil; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + --- 72,81 ---- + b(n/2,n/2) = 1000.0; + + ! // Create the stencil performing the computation. + ! Stencil<DoofNinePt> stencil; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + *************** + *** 85,89 **** + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + --- 86,90 ---- + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + ! std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + *************** + *** 93,94 **** + --- 94,96 ---- + return EXIT_SUCCESS; + } + + Index: docs/manual/programs/Doof2d-C-element-annotated.patch =================================================================== RCS file: Doof2d-C-element-annotated.patch diff -N Doof2d-C-element-annotated.patch *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-C-element-annotated.patch Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,150 ---- + *** Doof2d-C-element.cpp Tue Nov 27 08:36:38 2001 + --- Doof2d-C-element-annotated.cpp Tue Nov 27 12:08:03 2001 + *************** + *** 1,4 **** + ! #include // has std::cout, ... + ! #include // has EXIT_SUCCESS + + // Doof2d: C-like, element-wise implementation + --- 1,5 ---- + ! + ! #include <iostream> // has std::cout, ... + ! #include <stdlib.h> // has EXIT_SUCCESS + + // Doof2d: C-like, element-wise implementation + *************** + *** 6,30 **** + int main() + { + ! // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + ! // Use two-dimensional grids of values. + double **a; + double **b; + + // Ask the user for the number n of elements along one dimension of + ! // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + ! // Allocate the arrays. + typedef double* doublePtr; + a = new doublePtr[n]; + b = new doublePtr[n]; + ! for (int i = 0; i < n; i++) { + a[i] = new double[n]; + b[i] = new double[n]; + --- 7,31 ---- + int main() + { + ! // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + ! std::cout << "Please enter the number of averagings: "; + ! std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + ! // Use two-dimensional grids of values. + double **a; + double **b; + + // Ask the user for the number n of elements along one dimension of + ! // the grid. + long n; + ! std::cout << "Please enter the array size: "; + ! std::cin >> n; + + ! // Allocate the arrays. + typedef double* doublePtr; + a = new doublePtr[n]; + b = new doublePtr[n]; + ! for (int i = 0; i < n; i++) { + a[i] = new double[n]; + b[i] = new double[n]; + *************** + *** 32,49 **** + + // Set up the initial conditions. + ! // All grid values should be zero except for the central value. + ! for (int j = 0; j < n; j++) + ! for (int i = 0; i < n; i++) + a[i][j] = b[i][j] = 0.0; + b[n/2][n/2] = 1000.0; + + ! // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + a[i][j] = weight * + (b[i+1][j+1] + b[i+1][j ] + b[i+1][j-1] + + --- 33,50 ---- + + // Set up the initial conditions. + ! // All grid values should be zero except for the central value. + ! for (int j = 0; j < n; j++) + ! for (int i = 0; i < n; i++) + a[i][j] = b[i][j] = 0.0; + b[n/2][n/2] = 1000.0; + + ! // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Perform the simulation. + ! for (int k = 0; k < nuIterations; ++k) { + ! // Read from b. Write to a. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + a[i][j] = weight * + (b[i+1][j+1] + b[i+1][j ] + b[i+1][j-1] + + *************** + *** 51,57 **** + b[i-1][j+1] + b[i-1][j ] + b[i-1][j-1]); + + ! // Read from a. Write to b. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + b[i][j] = weight * + (a[i+1][j+1] + a[i+1][j ] + a[i+1][j-1] + + --- 52,58 ---- + b[i-1][j+1] + b[i-1][j ] + b[i-1][j-1]); + + ! // Read from a. Write to b. + ! for (int j = 1; j < n-1; j++) + ! for (int i = 1; i < n-1; i++) + b[i][j] = weight * + (a[i+1][j+1] + a[i+1][j ] + a[i+1][j-1] + + *************** + *** 60,68 **** + } + + ! // Print out the final central value. + ! std::cout << (nuAveragings % 2 ? a[n/2][n/2] : b[n/2][n/2]) << std::endl; + + ! // Deallocate the arrays. + ! for (int i = 0; i < n; i++) { + delete [] a[i]; + delete [] b[i]; + --- 61,69 ---- + } + + ! // Print out the final central value. + ! std::cout << (nuAveragings % 2 ? a[n/2][n/2] : b[n/2][n/2]) << std::endl; + + ! // Deallocate the arrays. + ! for (int i = 0; i < n; i++) { + delete [] a[i]; + delete [] b[i]; + *************** + *** 73,74 **** + --- 74,76 ---- + return EXIT_SUCCESS; + } + + Index: docs/manual/programs/makefile =================================================================== RCS file: makefile diff -N makefile *** /dev/null Fri Mar 23 21:37:44 2001 --- makefile Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,12 ---- + ### Oldham, Jeffrey D. + ### 2001Nov27 + ### Pooma + ### + ### Produce Annotated Source Code + + all: Doof2d-C-element-annotated.cpp Doof2d-Array-element-annotated.cpp \ + Doof2d-Array-parallel-annotated.cpp Doof2d-Array-stencil-annotated.cpp \ + Doof2d-Array-distributed-annotated.cpp + + %-annotated.cpp: %-annotated.patch %.cpp + patch -o $@ < $< Index: examples/Manual/Doof2d/Doof2d-Array-distributed.cpp =================================================================== RCS file: Doof2d-Array-distributed.cpp diff -N Doof2d-Array-distributed.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-distributed.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,111 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, stencil, multiple processor implementation + + // Define the stencil class performing the computation. + class DoofNinePt + { + public: + // Initialize the constant average weighting. + DoofNinePt() : weight(1.0/9.0) {} + + // This stencil operator is applied to each interior domain position + // (i,j). The "C" template parameter permits use of this stencil + // operator with both Arrays and Fields. + template + inline + typename C::Element_t + operator()(const C& x, int i, int j) const + { + return ( weight * + ( x.read(i+1,j+1) + x.read(i+1,j ) + x.read(i+1,j-1) + + x.read(i ,j+1) + x.read(i ,j ) + x.read(i ,j-1) + + x.read(i-1,j+1) + x.read(i-1,j ) + x.read(i-1,j-1) ) ); + } + + inline int lowerExtent(int) const { return 1; } + inline int upperExtent(int) const { return 1; } + + private: + + // In the average, weight element with this value. + const double weight; + }; + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of processors. + long nuProcessors; + std::cout << "Please enter the number of processors: "; + std::cin >> nuProcessors; + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + std::cout << "Please enter the number of averagings: "; + std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + Interval<1> I(1,n-2); + Interval<2> interiorDomain(I,I); + + // Create the distributed arrays. + // Partition the arrays' domains uniformly, i.e., each patch has the + // same size. The first parameter tells how many patches for each + // dimension. Guard layers optimize communication between patches. + // Internal guards surround each patch. External guards surround + // the entire array domain. + UniformGridPartition<2> partition(Loc<2>(nuProcessors, nuProcessors), + GuardLayers<2>(1), // internal + GuardLayers<2>(0)); // external + UniformGridLayout<2> layout(vertDomain, partition, DistributedTag()); + + // The template parameters indicate 2 dimensions and a 'double' + // element type. MultiPatch indicates multiple computation patches, + // i.e., distributed computation. The UniformTag indicates the + // patches should have the same size. Each patch has Brick type. + Array<2, double, MultiPatch > > a(layout); + Array<2, double, MultiPatch > > b(layout); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // Create the stencil performing the computation. + Stencil stencil; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + // Read from a. Write to b. + b(interiorDomain) = stencil(a, interiorDomain); + } + + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-Array-element.cpp =================================================================== RCS file: Doof2d-Array-element.cpp diff -N Doof2d-Array-element.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-element.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,70 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, element-wise implementation + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + std::cout << "Please enter the number of averagings: "; + std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + Array<2, double, Brick> a(vertDomain); + Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + for (int j = 1; j < n-1; j++) + for (int i = 1; i < n-1; i++) + a(i,j) = weight * + (b(i+1,j+1) + b(i+1,j ) + b(i+1,j-1) + + b(i ,j+1) + b(i ,j ) + b(i ,j-1) + + b(i-1,j+1) + b(i-1,j ) + b(i-1,j-1)); + + // Read from a. Write to b. + for (int j = 1; j < n-1; j++) + for (int i = 1; i < n-1; i++) + b(i,j) = weight * + (a(i+1,j+1) + a(i+1,j ) + a(i+1,j-1) + + a(i ,j+1) + a(i ,j ) + a(i ,j-1) + + a(i-1,j+1) + a(i-1,j ) + a(i-1,j-1)); + } + + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-Array-parallel.cpp =================================================================== RCS file: Doof2d-Array-parallel.cpp diff -N Doof2d-Array-parallel.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-parallel.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,70 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, data-parallel implementation + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + std::cout << "Please enter the number of averagings: "; + std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + Interval<1> I(1,n-2); + Interval<1> J(1,n-2); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + Array<2, double, Brick> a(vertDomain); + Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + a(I,J) = weight * + (b(I+1,J+1) + b(I+1,J ) + b(I+1,J-1) + + b(I ,J+1) + b(I ,J ) + b(I ,J-1) + + b(I-1,J+1) + b(I-1,J ) + b(I-1,J-1)); + + // Read from a. Write to b. + b(I,J) = weight * + (a(I+1,J+1) + a(I+1,J ) + a(I+1,J-1) + + a(I ,J+1) + a(I ,J ) + a(I ,J-1) + + a(I-1,J+1) + a(I-1,J ) + a(I-1,J-1)); + } + + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-Array-stencil.cpp =================================================================== RCS file: Doof2d-Array-stencil.cpp diff -N Doof2d-Array-stencil.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Array-stencil.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,94 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Arrays.h" // has Pooma's Array + + // Doof2d: Pooma Arrays, stencil implementation + + // Define the stencil class performing the computation. + class DoofNinePt + { + public: + // Initialize the constant average weighting. + DoofNinePt() : weight(1.0/9.0) {} + + // This stencil operator is applied to each interior domain position + // (i,j). The "C" template parameter permits use of this stencil + // operator with both Arrays and Fields. + template + inline + typename C::Element_t + operator()(const C& c, int i, int j) const + { + return ( weight * + ( c.read(i+1,j+1) + c.read(i+1,j ) + c.read(i+1,j-1) + + c.read(i ,j+1) + c.read(i ,j ) + c.read(i ,j-1) + + c.read(i-1,j+1) + c.read(i-1,j ) + c.read(i-1,j-1) ) ); + } + + inline int lowerExtent(int) const { return 1; } + inline int upperExtent(int) const { return 1; } + + private: + + // In the average, weight element with this value. + const double weight; + }; + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + std::cout << "Please enter the number of averagings: "; + std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the arrays' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + Interval<1> I(1,n-2); + Interval<2> interiorDomain(I,I); + + // Create the arrays. + // The template parameters indicate 2 dimensions, a 'double' element + // type, and ordinary 'Brick' storage. + Array<2, double, Brick> a(vertDomain); + Array<2, double, Brick> b(vertDomain); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // Create the stencil performing the computation. + Stencil stencil; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + a(interiorDomain) = stencil(b, interiorDomain); + + // Read from a. Write to b. + b(interiorDomain) = stencil(a, interiorDomain); + } + + // Print out the final central value. + Pooma::blockAndEvaluate(); // Ensure all computation has finished. + std::cout << (nuAveragings % 2 ? a(n/2,n/2) : b(n/2,n/2)) << std::endl; + + // The arrays are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-C-element.cpp =================================================================== RCS file: Doof2d-C-element.cpp diff -N Doof2d-C-element.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-C-element.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,74 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + + // Doof2d: C-like, element-wise implementation + + int main() + { + // Ask the user for the number of averagings. + long nuAveragings, nuIterations; + std::cout << "Please enter the number of averagings: "; + std::cin >> nuAveragings; + nuIterations = (nuAveragings+1)/2; // Each iteration performs two averagings. + + // Use two-dimensional grids of values. + double **a; + double **b; + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Allocate the arrays. + typedef double* doublePtr; + a = new doublePtr[n]; + b = new doublePtr[n]; + for (int i = 0; i < n; i++) { + a[i] = new double[n]; + b[i] = new double[n]; + } + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + for (int j = 0; j < n; j++) + for (int i = 0; i < n; i++) + a[i][j] = b[i][j] = 0.0; + b[n/2][n/2] = 1000.0; + + // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + for (int j = 1; j < n-1; j++) + for (int i = 1; i < n-1; i++) + a[i][j] = weight * + (b[i+1][j+1] + b[i+1][j ] + b[i+1][j-1] + + b[i ][j+1] + b[i ][j ] + b[i ][j-1] + + b[i-1][j+1] + b[i-1][j ] + b[i-1][j-1]); + + // Read from a. Write to b. + for (int j = 1; j < n-1; j++) + for (int i = 1; i < n-1; i++) + b[i][j] = weight * + (a[i+1][j+1] + a[i+1][j ] + a[i+1][j-1] + + a[i ][j+1] + a[i ][j ] + a[i ][j-1] + + a[i-1][j+1] + a[i-1][j ] + a[i-1][j-1]); + } + + // Print out the final central value. + std::cout << (nuAveragings % 2 ? a[n/2][n/2] : b[n/2][n/2]) << std::endl; + + // Deallocate the arrays. + for (int i = 0; i < n; i++) { + delete [] a[i]; + delete [] b[i]; + } + delete [] a; + delete [] b; + + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-Field-distributed.cpp =================================================================== RCS file: Doof2d-Field-distributed.cpp diff -N Doof2d-Field-distributed.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Field-distributed.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,87 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Fields.h" // has Pooma's Field + + // Doof2d: Pooma Fields, data-parallel, multiple processor implementation + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // nuIterations is the number of simulation iterations. + const int nuIterations = 10/2; + + // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // nuProcessors is the number of processors along one dimension. + const int nuProcessors = 2; + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the fields' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + Interval<1> I(1,n-2); + Interval<1> J(1,n-2); + + // Partition the fields' domains uniformly, i.e., each patch has the + // same size. The first parameter tells how many patches for each + // dimension. Guard layers optimize communication between patches. + // Internal guards surround each patch. External guards surround + // the entire field domain. + UniformGridPartition<2> partition(Loc<2>(nuProcessors, nuProcessors), + GuardLayers<2>(1), // internal + GuardLayers<2>(0)); // external + UniformGridLayout<2> layout(vertDomain, partition, DistributedTag()); + + // Specify the fields' mesh, i.e., its spatial extent, and its + // centering type. + UniformRectilinearMesh<2> mesh(layout, Vector<2>(0.0), Vector<2>(1.0, 1.0)); + Centering<2> cell = canonicalCentering<2>(CellType, Continuous, AllDim); + + // The template parameters indicate a mesh and a 'double' + // element type. MultiPatch indicates multiple computation patches, + // i.e., distributed computation. The UniformTag indicates the + // patches should have the same size. Each patch has Brick type. + Field, double, MultiPatch > > a(cell, layout, mesh); + Field, double, MultiPatch > > b(cell, layout, mesh); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + a(I,J) = weight * + (b(I+1,J+1) + b(I+1,J ) + b(I+1,J-1) + + b(I ,J+1) + b(I ,J ) + b(I ,J-1) + + b(I-1,J+1) + b(I-1,J ) + b(I-1,J-1)); + + // Read from a. Write to b. + b(I,J) = weight * + (a(I+1,J+1) + a(I+1,J ) + a(I+1,J-1) + + a(I ,J+1) + a(I ,J ) + a(I ,J-1) + + a(I-1,J+1) + a(I-1,J ) + a(I-1,J-1)); + } + + // Print out the final central value. + std::cout << b(n/2,n/2) << std::endl; + + // The fields are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/Doof2d-Field-parallel.cpp =================================================================== RCS file: Doof2d-Field-parallel.cpp diff -N Doof2d-Field-parallel.cpp *** /dev/null Fri Mar 23 21:37:44 2001 --- Doof2d-Field-parallel.cpp Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,72 ---- + #include // has std::cout, ... + #include // has EXIT_SUCCESS + #include "Pooma/Fields.h" // has Pooma's Field + + // Doof2d: Pooma Fields, data-parallel implementation + + int main(int argc, char *argv[]) + { + // Prepare the Pooma library for execution. + Pooma::initialize(argc,argv); + + // nuIterations is the number of simulation iterations. + const int nuIterations = 10/2; + + // In the average, weight element with this value. + const double weight = 1.0/9.0; + + // Ask the user for the number n of elements along one dimension of + // the grid. + long n; + std::cout << "Please enter the array size: "; + std::cin >> n; + + // Specify the fields' domains [0,n) x [0,n). + Interval<1> N(0, n-1); + Interval<2> vertDomain(N, N); + + // Set up interior domains [1,n-1) x [1,n-1) for computation. + Interval<1> I(1,n-2); + Interval<1> J(1,n-2); + + // Specify the fields' mesh, i.e., its spatial extent, and its + // centering type. + DomainLayout<2> layout(vertDomain); + UniformRectilinearMesh<2> mesh(layout, Vector<2>(0.0), Vector<2>(1.0, 1.0)); + Centering<2> cell = canonicalCentering<2>(CellType, Continuous, AllDim); + + // Create the fields. + // The template parameters indicate a mesh, a 'double' element + // type, and ordinary 'Brick' storage. + Field, double, Brick> a(cell, layout, mesh); + Field, double, Brick> b(cell, layout, mesh); + + // Set up the initial conditions. + // All grid values should be zero except for the central value. + a = b = 0.0; + b(n/2,n/2) = 1000.0; + + // Perform the simulation. + for (int k = 0; k < nuIterations; ++k) { + // Read from b. Write to a. + a(I,J) = weight * + (b(I+1,J+1) + b(I+1,J ) + b(I+1,J-1) + + b(I ,J+1) + b(I ,J ) + b(I ,J-1) + + b(I-1,J+1) + b(I-1,J ) + b(I-1,J-1)); + + // Read from a. Write to b. + b(I,J) = weight * + (a(I+1,J+1) + a(I+1,J ) + a(I+1,J-1) + + a(I ,J+1) + a(I ,J ) + a(I ,J-1) + + a(I-1,J+1) + a(I-1,J ) + a(I-1,J-1)); + } + + // Print out the final central value. + std::cout << b(n/2,n/2) << std::endl; + + // The fields are automatically deallocated. + + // Tell the Pooma library execution has finished. + Pooma::finalize(); + return EXIT_SUCCESS; + } Index: examples/Manual/Doof2d/include.mk =================================================================== RCS file: include.mk diff -N include.mk *** /dev/null Fri Mar 23 21:37:44 2001 --- include.mk Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,59 ---- + # Generated by mm.pl: Mon Mar 9 13:58:39 MST 1998 + # ACL:license + # ---------------------------------------------------------------------- + # This software and ancillary information (herein called "SOFTWARE") + # called POOMA (Parallel Object-Oriented Methods and Applications) is + # made available under the terms described here. The SOFTWARE has been + # approved for release with associated LA-CC Number LA-CC-98-65. + # + # Unless otherwise indicated, this SOFTWARE has been authored by an + # employee or employees of the University of California, operator of the + # Los Alamos National Laboratory under Contract No. W-7405-ENG-36 with + # the U.S. Department of Energy. The U.S. Government has rights to use, + # reproduce, and distribute this SOFTWARE. The public may copy, distribute, + # prepare derivative works and publicly display this SOFTWARE without + # charge, provided that this Notice and any statement of authorship are + # reproduced on all copies. Neither the Government nor the University + # makes any warranty, express or implied, or assumes any liability or + # responsibility for the use of this SOFTWARE. + # + # If SOFTWARE is modified to produce derivative works, such modified + # SOFTWARE should be clearly marked, so as not to confuse it with the + # version available from LANL. + # + # For more information about POOMA, send e-mail to address@hidden, + # or visit the POOMA web page at http://www.acl.lanl.gov/pooma/. + # ---------------------------------------------------------------------- + # ACL:license + + + # Wrap make components from SHARED_ROOT and the current directory in the + # proper order so that variables like ODIR have the correct directory-specific + # value at the right moment. See the included files for details of what they + # are doing. This file should NOT be manually edited. + + # Set NEXTDIR, THISDIR and DIR_LIST + include $(SHARED_ROOT)/include1.mk + + # Include list of subdirectories to process + -include $(THISDIR)/subdir.mk + + # Set ODIR, PROJECT_INCLUDES, UNIQUE + include $(SHARED_ROOT)/include2.mk + + # Set list of object files, relative to ODIR + -include $(THISDIR)/objfile.mk + + # Set rules for the ODIR directory + include $(SHARED_ROOT)/compilerules.mk + + # Remove current dir from DIR_LIST + DIR_LIST :=$(filter-out $(firstword $(DIR_LIST)), $(DIR_LIST)) + + + # ACL:rcsinfo + # ---------------------------------------------------------------------- + # $RCSfile: include.mk,v $ $Author: swhaney $ + # $Revision: 1.3 $ $Date: 2000/03/07 13:14:47 $ + # ---------------------------------------------------------------------- + # ACL:rcsinfo Index: examples/Manual/Doof2d/makefile =================================================================== RCS file: makefile diff -N makefile *** /dev/null Fri Mar 23 21:37:44 2001 --- makefile Mon Dec 3 14:01:55 2001 *************** *** 0 **** --- 1,96 ---- + # Generated by mm.pl: Mon Mar 9 13:58:39 MST 1998 + # ACL:license + # ---------------------------------------------------------------------- + # This software and ancillary information (herein called "SOFTWARE") + # called POOMA (Parallel Object-Oriented Methods and Applications) is + # made available under the terms described here. The SOFTWARE has been + # approved for release with associated LA-CC Number LA-CC-98-65. + # + # Unless otherwise indicated, this SOFTWARE has been authored by an + # employee or employees of the University of California, operator of the + # Los Alamos National Laboratory under Contract No. W-7405-ENG-36 with + # the U.S. Department of Energy. The U.S. Government has rights to use, + # reproduce, and distribute this SOFTWARE. The public may copy, distribute, + # prepare derivative works and publicly display this SOFTWARE without + # charge, provided that this Notice and any statement of authorship are + # reproduced on all copies. Neither the Government nor the University + # makes any warranty, express or implied, or assumes any liability or + # responsibility for the use of this SOFTWARE. + # + # If SOFTWARE is modified to produce derivative works, such modified + # SOFTWARE should be clearly marked, so as not to confuse it with the + # version available from LANL. + # + # For more information about POOMA, send e-mail to address@hidden, + # or visit the POOMA web page at http://www.acl.lanl.gov/pooma/. + # ---------------------------------------------------------------------- + # ACL:license + + # This file is user-editable + + PROJECT_ROOT = $(shell cd ../../..; pwd) + include $(PROJECT_ROOT)/config/head.mk + + PASS=APP + + default:: Doof2d-C-element Doof2d-Array-element Doof2d-Array-parallel \ + Doof2d-Array-stencil Doof2d-Array-distributed \ + Doof2d-Field-parallel Doof2d-Field-distributed + + .PHONY: Doof2d-C-element + + Doof2d-C-element:: $(ODIR)/Doof2d-C-element + + $(ODIR)/Doof2d-C-element: $(ODIR)/Doof2d-C-element.o + $(LinkToSuite) + + .PHONY: Doof2d-Array-element + + Doof2d-Array-element:: $(ODIR)/Doof2d-Array-element + + $(ODIR)/Doof2d-Array-element: $(ODIR)/Doof2d-Array-element.o + $(LinkToSuite) + + .PHONY: Doof2d-Array-parallel + + Doof2d-Array-parallel:: $(ODIR)/Doof2d-Array-parallel + + $(ODIR)/Doof2d-Array-parallel: $(ODIR)/Doof2d-Array-parallel.o + $(LinkToSuite) + + .PHONY: Doof2d-Array-stencil + + Doof2d-Array-stencil:: $(ODIR)/Doof2d-Array-stencil + + $(ODIR)/Doof2d-Array-stencil: $(ODIR)/Doof2d-Array-stencil.o + $(LinkToSuite) + + .PHONY: Doof2d-Array-distributed + + Doof2d-Array-distributed:: $(ODIR)/Doof2d-Array-distributed + + $(ODIR)/Doof2d-Array-distributed: $(ODIR)/Doof2d-Array-distributed.o + $(LinkToSuite) + + .PHONY: Doof2d-Field-parallel + + Doof2d-Field-parallel:: $(ODIR)/Doof2d-Field-parallel + + $(ODIR)/Doof2d-Field-parallel: $(ODIR)/Doof2d-Field-parallel.o + $(LinkToSuite) + + .PHONY: Doof2d-Field-distributed + + Doof2d-Field-distributed:: $(ODIR)/Doof2d-Field-distributed + + $(ODIR)/Doof2d-Field-distributed: $(ODIR)/Doof2d-Field-distributed.o + $(LinkToSuite) + + include $(SHARED_ROOT)/tail.mk + + # ACL:rcsinfo + # ---------------------------------------------------------------------- + # $RCSfile: makefile,v $ $Author: oldham $ + # $Revision: 1.1 $ $Date: 2000/07/21 21:34:44 $ + # ---------------------------------------------------------------------- + # ACL:rcsinfo