freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

pooma performance


From: Tijskens, Bert
Subject: pooma performance
Date: Tue, 4 Dec 2001 08:10:39 +0100

Hi,
looking for support for POOMA++ 2.3.0 I found this e-mail adress
somewhere on the internet.
I wrote the following benchmark program which computes something of the
form y=3Dax+b, where y and x are (DynamicArrays of) vectors and a and b
are (DynamicArrays of) scalars. I was surprised and dissappointed to
find out that the simple c version of this loop is 4-5 times faster than
the POOMA version? I suppose I must have overlooked something. Can you
help? the program is at the bottom of this message

the tests were run on a PC using the Intel C++ compiler

many thanks in advance,
bert



Dr. Engelbert TIJSKENS

Laboratory for Agro-Machinery and -Processing
Department of Agro-Engineering and -Economy
KULeuven

Kasteelpark Arenberg 30
B-3001 LEUVEN
BELGIUM

tel: ++(32) 16 32 8557
fax: ++(32) 16 32 8590
e-mail: address@hidden



Here's the program

#include "Pooma/Particles.h"
#include "Pooma/DynamicArrays.h"
#include "Tiny/Vector.h"
#include "Utilities/Inform.h"
#include <iostream>
#include <stdlib.h>
#include <Timer/Timer.h>

#if POOMA_CHEETAH
        typedef MultiPatch< DynamicTag, Remote<Dynamic> >
AttributeEngineTag_t;
#else
        typedef MultiPatch< DynamicTag,        Dynamic  >
AttributeEngineTag_t;
#endif

template <class Layout_t>
struct PC_UniformLayout_traits
{
        typedef AttributeEngineTag_t AttributeEngineTag_t;
        typedef Layout_t ParticleLayout_t;
};


// The particle traits class and layout type for this application
typedef PC_UniformLayout_traits<UniformLayout> PC_UniformLayout_t;

// Dimensionality of this problem
static const int nsd =3D 3;
static const int NumPart =3D 10000;     // Number of particles in
simulation
static const int nLoops  =3D 100;               // Number of loops

// Particles subclass with position and velocity
class PC : public Particles<PC_UniformLayout_t>
{
public:
 // Typedefs
        typedef Particles<PC_UniformLayout_t>   Base_t;
        typedef Base_t::AttributeEngineTag_t    AttributeEngineTag_t;
        typedef Base_t::ParticleLayout_t
ParticleLayout_t;
        typedef double                          AxisType_t;
        typedef Vector<nsd,AxisType_t>         PointType_t;

 // Constructor: set up layouts, register attributes
        PC(const ParticleLayout_t &pl) :
Particles<PC_UniformLayout_t>(pl)
        {
                addAttribute(y);
                addAttribute(x);
                addAttribute(a);
                addAttribute(b);=09
        }

 // Position and velocity attributes (as public members)
        DynamicArray<PointType_t,AttributeEngineTag_t>  x,y;
        DynamicArray<AxisType_t ,AttributeEngineTag_t>  a,b;

        double x_[NumPart][nsd], y_[NumPart][nsd];
        double a_[NumPart], b_[NumPart];
};


// Number of patches to distribute particles across.
// Typically one would use one patch per processor.
const int numPatches =3D 1;


// Main simulation routine
int main(int argc, char *argv[])
{
 // Initialize POOMA and output stream
        Pooma::initialize(argc,argv);
        Inform out(argv[0]);
=09
        out << "Begin Bounce example code" << std::endl;
        out << "-------------------------" << std::endl;

 // Create a particle layout object for our use
        PC_UniformLayout_t::ParticleLayout_t particleLayout(numPatches);

 // Create the Particles subclass object
        PC pc(particleLayout);

 // Create some particles, recompute the global domain, and initialize
 // the attributes randomly.
        pc.globalCreate(NumPart);

        srand(12345U);
        typedef PC::AxisType_t Coordinate_t;
        Coordinate_t recranmax =3D
1.0/static_cast<Coordinate_t>(RAND_MAX);
        for (int i =3D 0; i < NumPart; ++i)
        {
                for (int d =3D 0; d < nsd; ++d) {
                        pc.x(i)(d) =3D rand() * recranmax;
                        pc.x_[i][d] =3D pc.x(i)(d);
                }
                pc.a_[i] =3D pc.a(i) =3D rand() * recranmax;
                pc.b_[i] =3D pc.b(i) =3D rand() * recranmax;
        }

 // reference using ordinary arrays : y =3D ax+b
        Timer t_array("ordinary arrays",cout); // starts a timer
        for (int it=3D1; it <=3D nLoops; ++it)
        {
                for (int i =3D 0; i < NumPart; ++i)
                        for (int d =3D 0; d < nsd; ++d)
                                pc.y_[i][d] =3D pc.a_[i]*pc.x_[i][d] +
pc.b_[i];
        }
        t_array.stop();
        t_array.print();

 // using pooma attributes: y =3D ax+b
        Timer t_PoomaAttributes("pooma attributes",cout); // starts a
timer
        for (int it=3D1; it <=3D nLoops; ++it)
        {
                pc.y =3D pc.a*pc.x + pc.b;
        }
        t_PoomaAttributes.stop();
        t_PoomaAttributes.print();

 // Shut down POOMA and exit
        Pooma::finalize();
        return 0;
}


reply via email to

[Prev in Thread] Current Thread [Next in Thread]