rambrain
performanceTestClasses.cpp
Go to the documentation of this file.
1 /* rambrain - a dynamical physical memory extender
2  * Copyright (C) 2015 M. Imgrund, A. Arth
3  * mimgrund (at) mpifr-bonn.mpg.de
4  * arth (at) usm.uni-muenchen.de
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "performanceTestClasses.h"
21 #include <chrono>
22 
23 #ifndef OpenMP_NOT_FOUND
24 #include <omp.h>
25 #endif
26 
27 map<string, performanceTest<> *> performanceTest<>::testClasses;
29 
30 performanceTest<>::performanceTest ( const char *name ) : name ( name )
31 {
32  testClasses[name] = this;
33 }
34 
35 void performanceTest<>::runTests ( unsigned int repetitions, const string &path )
36 {
37  cout << "Running test case " << name << std::endl;
38  int dummy = 0;
39 
40  for ( int param = parameters.size() - 1; param >= 0; --param ) {
41  if ( parameters[param]->enabled ) {
42  unsigned int steps = getStepsForParam ( param );
43  stringstream outname;
44  outname << name << param;
45 
46  ofstream temp ( outname.str() );
47 
48  for ( unsigned int step = 0; step < steps; ++step ) {
49  string params = getParamsString ( param, step );
50  stringstream call;
51  call << path << "rambrain-performancetests " << repetitions << " " << name << " " << params << " 2> /dev/null";
52  cout << "Calling: " << call.str() << endl;
53  dummy |= system ( call.str().c_str() );
54 
55  resultToTempFile ( param, step, temp );
56  temp << endl;
57 #ifdef LOGSTATS
58  if ( plotTimingStats ) {
59  handleTimingInfos ( param, step, repetitions );
60  }
61 #endif
62  }
63 
64  temp.close();
65  stringstream gp_name, gp_exec;
66  gp_name << name << param << ".gnuplot";
67  ofstream gnuplot ( gp_name.str() );
68  gp_exec << "gnuplot " << gp_name.str();
69 
70  cout << "Generating output file " << outname.str() << endl;
71  gnuplot << generateGnuplotScript ( outname.str(), outname.str(), parameters[param]->name, "Execution time [ms]", name, parameters[param]->deltaLog, parameters.size() - param );
72  gnuplot.close();
73 
74  cout << "Calling gnuplot and displaying result" << endl;
75  dummy |= system ( gp_exec.str().c_str() );
76  dummy |= system ( ( "convert -density 300 -resize 1920x " + outname.str() + ".eps -flatten " + outname.str() + ".png" ).c_str() );
77  if ( displayPlots ) {
78  dummy |= system ( ( "display " + outname.str() + ".png &" ).c_str() );
79  }
80 
81  if ( !dummy ) {
82  cerr << "An error in system calls occured..." << endl;
83  }
84  }
85  }
86 }
87 
88 void performanceTest<>::runRegisteredTests ( unsigned int repetitions, const string &path )
89 {
90  for ( auto it = testClasses.begin(); it != testClasses.end(); ++it ) {
91  performanceTest<> *test = it->second;
92  if ( test->enabled ) {
93  test->runTests ( repetitions, path );
94  } else {
95  cout << "Skipping test " << test->name << " because it is disabled." << endl;
96  }
97  }
98 }
99 
100 void performanceTest<>::enableTest ( const string &name, bool enabled )
101 {
102  auto it = testClasses.find ( name );
103  if ( it != testClasses.end() ) {
104  performanceTest<> *test = it->second;
105  test->enabled = enabled;
106  } else {
107  cerr << "Test " << name << " not found " << endl;
108  }
109 }
110 
111 void performanceTest<>::enableAllTests ( bool enabled )
112 {
113  for ( auto it = testClasses.begin(); it != testClasses.end(); ++it ) {
114  performanceTest<> *test = it->second;
115  test->enabled = enabled;
116  }
117 }
118 
119 void performanceTest<>::unregisterTest ( const string &name )
120 {
121  auto it = testClasses.find ( name );
122  if ( it != testClasses.end() ) {
123  testClasses.erase ( it );
124  } else {
125  cerr << "Test " << name << " not found " << endl;
126  }
127 }
128 
130 {
131  for ( auto it = testClasses.begin(); it != testClasses.end(); ++it ) {
132  performanceTest<> *test = it->second;
133 
134  cout << "Test class " << test->name << " is currently " << ( test->enabled ? "enabled" : "disabled" ) << ". ";
135  cout << "It has " << test->parameters.size() << " parameters:" << endl;
136 
137  for ( auto jt = test->parameters.rbegin(); jt != test->parameters.rend(); ++jt ) {
138  testParameterBase *param = *jt;
139 
140  if ( param->enabled ) {
141  cout << "\tFrom\t" << param->valueAsString ( 0 ) << "\tover\t" << param->valueAsString() << "\tto\t" << param->valueAsString ( param->steps - 1 ) << "\tin ";
142  cout << param->steps << ( param->deltaLog ? " logarithmic" : " linear" ) << " steps" << endl;
143  } else {
144  cout << "\tParameter variation is currently disabled" << endl;
145  }
146  }
147 
148  cout << endl;
149  }
150 }
151 
152 bool performanceTest<>::runRespectiveTest ( const string &name, tester &myTester, unsigned int repetitions, char **arguments, int &offset, int argumentscount )
153 {
154  auto it = testClasses.find ( name );
155  if ( it != testClasses.end() ) {
156  performanceTest<> *test = it->second;
157  for ( unsigned int r = 0; r < repetitions; ++r ) {
158  cout << "Repetition " << ( r + 1 ) << " out of " << repetitions << " " << '\r';
159  cout.flush();
160  int myOffset = offset;
161  myTester.startNewTimeCycle();
162  test->actualTestMethod ( myTester, arguments, myOffset, argumentscount );
163 
164  if ( r == repetitions - 1 ) {
165  offset = myOffset;
166  }
167  }
168  cout << " " << '\r';
169  cout.flush();
170 
171  return true;
172  } else {
173  cerr << "Test " << name << " not found " << endl;
174 
175  return false;
176  }
177 }
178 
179 string performanceTest<>::getParamsString ( int varryParam, unsigned int step, const string &delimiter )
180 {
181  stringstream ss;
182  for ( int i = parameters.size() - 1; i >= 0; --i ) {
183  if ( i == varryParam ) {
184  ss << parameters[i]->valueAsString ( step );
185  } else {
186  ss << parameters[i]->valueAsString();
187  }
188  ss << delimiter;
189  }
190  return ss.str();
191 }
192 
193 string performanceTest<>::getTestOutfile ( int varryParam, unsigned int step )
194 {
195  stringstream ss;
196  ss << name;
197  for ( int i = parameters.size() - 1; i >= 0; --i ) {
198  if ( i == varryParam ) {
199  ss << "#" << parameters[i]->valueAsString ( step );
200  } else {
201  ss << "#" << parameters[i]->valueAsString();
202  }
203  }
204  return ss.str();
205 }
206 
207 void performanceTest<>::resultToTempFile ( int varryParam, unsigned int step, ofstream &file )
208 {
209  file << getParamsString ( varryParam, step, "\t" );
210  ifstream test ( getTestOutfile ( varryParam, step ) );
211  string line;
212  while ( getline ( test, line ) ) {
213  if ( line.find ( '#' ) == string::npos ) {
214  vector<string> parts = splitString ( line, '\t' );
215  file << parts[parts.size() - 2] << '\t';
216  }
217  }
218 }
219 
220 vector<string> performanceTest<>::splitString ( const string &in, char delimiter )
221 {
222  stringstream ss ( in );
223  vector<string> parts;
224  string part;
225  while ( getline ( ss, part, delimiter ) ) {
226  parts.push_back ( part );
227  }
228  return parts;
229 }
230 
231 string performanceTest<>::generateGnuplotScript ( const string &infile, const string &name, const string &xlabel, const string &ylabel, const string &title, bool log, int paramColumn )
232 {
233  stringstream ss;
234  ss << "set terminal postscript eps enhanced color 'Helvetica,10'" << endl;
235  ss << "set output \"" << name << ".eps\"" << endl;
236  ss << "set xlabel \"" << xlabel << "\"" << endl;
237  ss << "set ylabel \"" << ylabel << "\"" << endl;
238  ss << "set title \"" << title << "\"" << endl;
239  if ( log ) {
240  ss << "set log xy" << endl;
241  } else {
242  ss << "set log y" << endl;
243  }
244  ss << generateMyGnuplotPlotPart ( infile, paramColumn );
245  return ss.str();
246 }
247 
248 void performanceTest<>::handleTimingInfos ( int varryParam, unsigned int step, unsigned int repetitions )
249 {
250  // Move stats file for permanent storage
251  string outFile = getTestOutfile ( varryParam, step );
252  string timingFile = outFile + "_stats";
253  string hitMissFile = outFile + "_hm";
254  string tempFile = outFile + "_timing";
255  if ( rename ( "rambrain-swapstats.log", timingFile.c_str() ) ) {
256  errmsgf ( "Could not rename swapstats log to %s", timingFile.c_str() );
257  }
258 
259  ifstream testInFile ( outFile );
260  ifstream timingInFile ( timingFile );
261  ofstream timingTruncFile ( tempFile );
262 
263  int initpos = timingInFile.tellg();
264  // Go through test output and get first pair of times
265  string testLine;
266  int measurements = 0, dataPoints = 0;
267  unsigned long long starttimes[repetitions];
268  for ( unsigned int r = 0; r < repetitions; ++r ) {
269  starttimes[r] = 0LLu;
270  }
271 
272  while ( getline ( testInFile, testLine ) ) {
273  if ( testLine.find ( '#' ) == string::npos ) {
274  vector<string> testParts = splitString ( testLine, '\t' );
275 
276  ++ measurements;
277  const unsigned int runCols = 4;
278  char *buf;
279  timingInFile.seekg ( initpos );
280  for ( unsigned int r = 0; r < repetitions; ++r ) {
281  unsigned long long start = strtoull ( testParts[r * runCols + 1].c_str(), &buf, 10 );
282  unsigned long long end = strtoull ( testParts[r * runCols + 2].c_str(), &buf, 10 );
283 
284  // No go through timing file and look for the matching lines there
285  vector<vector<string>> relevantTimingParts = getRelevantTimingParts ( timingInFile, start, end );
286 
287  // We have all stats for the current run segment, output this data to the temp file
288  timingInfosToFile ( timingTruncFile, relevantTimingParts, starttimes[r] );
289  dataPoints += relevantTimingParts.size();
290  }
291  }
292  }
293 
294  testInFile.close();
295  timingInFile.close();
296  timingTruncFile.close();
297 
298  // Plot that thing
299  ofstream gnuplot1 ( timingFile + ".gnuplot" ), gnuplot2 ( hitMissFile + ".gnuplot" );
300  cout << "Generating output files " << timingFile << " and " << hitMissFile << endl;
301 
302  const int maxDataPoints = 50 * repetitions;
303  plotTimingInfos ( gnuplot1, timingFile, tempFile, measurements, repetitions, dataPoints <= maxDataPoints );
304  plotTimingHitMissInfos ( gnuplot2, hitMissFile, tempFile, measurements, repetitions, dataPoints <= maxDataPoints );
305 
306  gnuplot1.close();
307  gnuplot2.close();
308 
309  cout << "Calling gnuplot and displaying result" << endl;
310  int dummy = 0;
311  dummy |= system ( ( "gnuplot " + timingFile + ".gnuplot" ).c_str() );
312  dummy |= system ( ( "convert -density 300 -resize 1920x " + timingFile + ".eps -flatten " + timingFile + ".png" ).c_str() );
313  if ( displayPlots ) {
314  dummy |= system ( ( "display " + timingFile + ".png &" ).c_str() );
315  }
316 
317  dummy |= system ( ( "gnuplot " + hitMissFile + ".gnuplot" ).c_str() );
318  dummy |= system ( ( "convert -density 300 -resize 1920x " + hitMissFile + ".eps -flatten " + hitMissFile + ".png" ).c_str() );
319  if ( displayPlots ) {
320  dummy |= system ( ( "display " + hitMissFile + ".png &" ).c_str() );
321  }
322 
323  if ( !dummy ) {
324  cerr << "An error in system calls occured..." << endl;
325  }
326 }
327 
328 vector<vector<string>> performanceTest<>::getRelevantTimingParts ( ifstream &in, unsigned long long start, unsigned long long end )
329 {
330  vector<vector<string>> relevantTimingParts;
331  string timingLine;
332  char *buf;
333 
334  while ( getline ( in, timingLine ) ) {
335  if ( timingLine.find ( '#' ) == string::npos ) {
336  vector<string> timingParts = splitString ( timingLine, '\t' );
337 
338  unsigned long long current = strtoull ( timingParts[0].c_str(), &buf, 10 );
339 
340  if ( current >= start && current <= end ) {
341  relevantTimingParts.push_back ( timingParts );
342  }
343  if ( current > end ) {
344  break;
345  }
346  }
347  }
348  return relevantTimingParts;
349 }
350 
351 void performanceTest<>::timingInfosToFile ( ofstream &out, const vector<vector<string>> &relevantTimingParts, unsigned long long &starttime )
352 {
353  out << "#Time since beginning [ms]\tSwappedOut [B]\tSwappedIn [B]\tMemory Used [B]\tSwap Used [B]\tHit / Miss" << endl;
354  if ( relevantTimingParts.size() > 0 ) {
355  char *buf;
356  if ( starttime == 0LLu ) {
357  starttime = strtoull ( relevantTimingParts.front() [0].c_str(), &buf, 10 );
358  }
359  for ( auto it = relevantTimingParts.begin(); it != relevantTimingParts.end(); ++it ) {
360  const unsigned long long relTime = strtoull ( ( *it ) [0].c_str(), &buf, 10 ) - starttime;
361  const double mbOut = strtod ( ( *it ) [2].c_str() , NULL ) / mib;
362  const double mbIn = strtod ( ( *it ) [5].c_str(), NULL ) / mib;
363  const string hitmiss = ( *it ) [7];
364  const double mbUsed = strtod ( ( *it ) [8].c_str(), NULL ) / mib;
365  const double mbSwapped = strtod ( ( *it ) [10].c_str(), NULL ) / mib;
366 
367  out << relTime << " " << mbOut << " " << mbIn << " " << mbUsed << " " << mbSwapped << " " << hitmiss << endl;
368  }
369  } else {
370  out << 0 << " " << 0 << " " << 0 << " " << 0 << " " << 0 << " " << 0.0 << endl;
371  }
372  out << endl;
373 }
374 
375 void performanceTest<>::plotTimingInfos ( ofstream &gnutemp, const string &outname, const string &dataFile, unsigned int measurements, unsigned int repetitions, bool linesPoints )
376 {
377  gnutemp << "set terminal postscript eps enhanced color 'Helvetica,10'" << endl;
378  gnutemp << "set output \"" << outname << ".eps\"" << endl;
379  gnutemp << "set xlabel \"Time [ms]\"" << endl;
380  gnutemp << "set ylabel \"Swap Movement [MB]\"" << endl;
381  gnutemp << "set title \"" << name << "\"" << endl;
382  gnutemp << "set key top left" << endl;
383 
384  if ( linesPoints ) {
385  gnutemp << "set style data linespoints" << endl;
386  } else {
387  gnutemp << "set style data lines" << endl;
388  }
389 
390  gnutemp << "plot ";
391  int c = 1;
392  for ( unsigned int m = 0, s = 2; m < measurements; ++m, ++s ) {
393  int mrep = m * repetitions;
394  gnutemp << "'" << dataFile << "' every :::" << mrep << "::" << ( mrep + repetitions - 1 ) << " using 1:2 lt 1";
395  if ( linesPoints ) {
396  gnutemp << " pt " << s;
397  }
398  gnutemp << " lc " << c++ << " title \"" << "Swapped out: " << plotParts[m] << "\", \\" << endl;
399 
400  gnutemp << "'" << dataFile << "' every :::" << mrep << "::" << ( mrep + repetitions - 1 ) << " using 1:3 lt 1";
401  if ( linesPoints ) {
402  gnutemp << " pt " << s;
403  }
404  gnutemp << " lc " << c++ << " title \"" << "Swapped in: " << plotParts[m] << "\", \\" << endl;
405  gnutemp << "'" << dataFile << "' every :::" << mrep << "::" << ( mrep + repetitions - 1 ) << " using 1:4 lt 2";
406  if ( linesPoints ) {
407  gnutemp << " pt " << s;
408  }
409  gnutemp << " lc " << c++ << " title \"" << "Main memory: " << plotParts[m] << "\", \\" << endl;
410  gnutemp << "'" << dataFile << "' every :::" << mrep << "::" << ( mrep + repetitions - 1 ) << " using 1:5 lt 2";
411  if ( linesPoints ) {
412  gnutemp << " pt " << s;
413  }
414  gnutemp << " lc " << c++ << " title \"" << "Swap memory:" << plotParts[m] << "\"";
415  if ( m != measurements - 1 ) {
416  gnutemp << ", \\";
417  }
418  gnutemp << endl;
419  }
420 }
421 
422 void performanceTest<>::plotTimingHitMissInfos ( ofstream &gnutemp, const string &outname, const string &dataFile, unsigned int measurements, unsigned int repetitions, bool linesPoints )
423 {
424  gnutemp << "set terminal postscript eps enhanced color 'Helvetica,10'" << endl;
425  gnutemp << "set output \"" << outname << ".eps\"" << endl;
426  gnutemp << "set xlabel \"Time [ms]\"" << endl;
427  gnutemp << "set ylabel \"Hit / Miss ratio\"" << endl;
428  gnutemp << "set title \"" << name << "\"" << endl;
429  gnutemp << "set log y" << endl;
430 
431  if ( linesPoints ) {
432  gnutemp << "set style data linespoints" << endl;
433  } else {
434  gnutemp << "set style data lines" << endl;
435  }
436 
437  gnutemp << "plot ";
438  int c = 1;
439  for ( unsigned int m = 0, s = 2; m < measurements; ++m, ++s, ++c ) {
440  int mrep = m * repetitions;
441  gnutemp << "'" << dataFile << "' every :::" << mrep << "::" << ( mrep + repetitions - 1 ) << " using 1:6 lt 1";
442  if ( linesPoints ) {
443  gnutemp << " pt " << s;
444  }
445  gnutemp << " lc " << c << " title \"" << plotParts[m] << ": Hit / Miss\", \\" << endl;
446  }
447 }
448 
449 
451 
452 #define TESTSTATICS(name, commenttext) string name::comment = commenttext; \
453  name name##Instance
454 
455 #define TESTPARAM(param, minimum, maximum, nrsteps, log, meanvalue, paramname) parameter##param.min = minimum; \
456  parameter##param.max = maximum; \
457  parameter##param.steps = nrsteps; \
458  parameter##param.deltaLog = log; \
459  parameter##param.mean = meanvalue; \
460  parameter##param.name = paramname
461 
462 
463 
464 TESTSTATICS ( matrixTransposeTest, "Measurements of allocation and definition, transposition, deletion times" );
465 
466 matrixTransposeTest::matrixTransposeTest() : performanceTest<int, int> ( "MatrixTranspose" )
467 {
468  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
469  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
470  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
471 }
472 
473 void matrixTransposeTest::actualTestMethod ( tester &test, int param1, int param2 )
474 {
475  const global_bytesize size = param1;
476  const global_bytesize memlines = param2;
477  const global_bytesize mem = size * sizeof ( double ) * memlines;
478  const global_bytesize swapmem = size * size * sizeof ( double ) * 2;
479 
482 
483  test.addTimeMeasurement();
484 
485  // Allocate and set
486  managedPtr<double> *rows[size];
487  for ( unsigned int i = 0; i < size; ++i ) {
488  rows[i] = new managedPtr<double> ( size );
489  adhereTo<double> rowloc ( *rows[i] );
490  double *rowdbl = rowloc;
491  for ( unsigned int j = 0; j < size; ++j ) {
492  rowdbl[j] = i * size + j;
493  }
494  }
495 
496  test.addTimeMeasurement();
497 
498  // Transpose
499  for ( unsigned int i = 0; i < size; ++i ) {
500  adhereTo<double> rowloc1 ( *rows[i] );
501  double *rowdbl1 = rowloc1;
502  for ( unsigned int j = i + 1; j < size; ++j ) {
503  adhereTo<double> rowloc2 ( *rows[j] );
504  double *rowdbl2 = rowloc2;
505 
506  double buffer = rowdbl1[j];
507  rowdbl1[j] = rowdbl2[i];
508  rowdbl2[i] = buffer;
509  }
510  }
511 
512  test.addTimeMeasurement();
513 
514 #ifdef PTEST_CHECKS
515  for ( unsigned int i = 0; i < size; ++i ) {
516  adhereTo<double> rowloc ( *rows[i] );
517  double *rowdbl = rowloc;
518  for ( unsigned int j = 0; j < size; ++j ) {
519  if ( rowdbl[j] != j * size + i ) {
520  printf ( "Failed check!\n" );
521  }
522  }
523  }
524 #endif
525 
526  // Delete
527  for ( unsigned int i = 0; i < size; ++i ) {
528  delete rows[i];
529  }
530 
531  test.addTimeMeasurement();
532 }
533 
534 string matrixTransposeTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
535 {
536  stringstream ss;
537  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
538  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
539  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
540  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"" << endl;
541  return ss.str();
542 }
543 
544 
545 TESTSTATICS ( matrixCleverTransposeTest, "Measurements of allocation and definition, transposition, deletion times, but with a clever transposition algorithm" );
546 
547 matrixCleverTransposeTest::matrixCleverTransposeTest() : performanceTest<int, int> ( "MatrixCleverTranspose" )
548 {
549  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
550  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
551  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
552 }
553 
554 void matrixCleverTransposeTest::actualTestMethod ( tester &test, int param1, int param2 )
555 {
556  const global_bytesize size = param1;
557  const global_bytesize memlines = param2;
558  const global_bytesize mem = size * sizeof ( double ) * memlines;
559  const global_bytesize swapmem = size * size * sizeof ( double ) * 2;
560 
563 
564  test.addTimeMeasurement();
565 
566  // Allocate and set
567  managedPtr<double> *rows[size];
568  for ( unsigned int i = 0; i < size; ++i ) {
569  rows[i] = new managedPtr<double> ( size );
570  adhereTo<double> rowloc ( *rows[i] );
571  double *rowdbl = rowloc;
572  for ( unsigned int j = 0; j < size; ++j ) {
573  rowdbl[j] = i * size + j;
574  }
575  }
576 
577  test.addTimeMeasurement();
578 
579  // Transpose blockwise
580  unsigned int rows_fetch = memlines / 2 > size ? size : memlines / 2;
581  unsigned int n_blocks = size / rows_fetch + ( size % rows_fetch == 0 ? 0 : 1 );
582 
583  adhereTo<double> *Arows[rows_fetch];
584  adhereTo<double> *Brows[rows_fetch];
585 
586  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
587  for ( unsigned int ii = 0; ii <= jj; ii++ ) {
588  //A_iijj <-> B_jjii
589 
590  //Reserve rows ii and jj
591  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
592  unsigned int j_lim = ( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
593  unsigned int i_off = ii * rows_fetch; // Block A, vertical index
594  unsigned int j_off = jj * rows_fetch; // Block A, horizontal index
595 
596  //Get rows A_ii** and B_jj** into memory:
597  for ( unsigned int i = 0; i < i_lim; ++i ) {
598  Arows[i] = new adhereTo<double> ( *rows[i + i_off] );
599  }
600  for ( unsigned int j = 0; j < j_lim; ++j ) {
601  Brows[j] = new adhereTo<double> ( *rows[j + j_off] );
602  }
603 
604  for ( unsigned int j = 0; j < j_lim; j++ ) {
605  for ( unsigned int i = 0; i < ( jj == ii ? j : i_lim ); i++ ) { //Inner block matrix transpose, vertical index in A
606  //Inner block matrxi transpose, horizontal index in A
607  double *Arowdb = *Arows[i]; //Fetch pointer for Element of A_ii+i
608  double *Browdb = *Brows[j];
609 
610  double inter = Arowdb[j_off + j]; //Store inner element A_ij
611  Arowdb[j + j_off] = Browdb[ i + i_off]; //Override with element of B_ji
612  Browdb[i + i_off] = inter; //set B_ji to former val of A_ij
613  }
614  }
615 
616  for ( unsigned int i = 0; i < i_lim; ++i ) {
617  delete ( Arows[i] );
618  }
619  for ( unsigned int j = 0; j < j_lim; ++j ) {
620  delete ( Brows[j] );
621  }
622  }
623  }
624 
625  test.addTimeMeasurement();
626 
627 #ifdef PTEST_CHECKS
628  for ( unsigned int i = 0; i < size; ++i ) {
629  adhereTo<double> rowloc ( *rows[i] );
630  double *rowdbl = rowloc;
631  for ( unsigned int j = 0; j < size; ++j ) {
632  if ( rowdbl[j] != j * size + i ) {
633  printf ( "Failed check!\n" );
634  }
635  }
636  }
637 #endif
638 
639  // Delete
640  for ( unsigned int i = 0; i < size; ++i ) {
641  delete rows[i];
642  }
643 
644  test.addTimeMeasurement();
645 }
646 
647 string matrixCleverTransposeTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
648 {
649  stringstream ss;
650  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
651  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
652  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
653  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
654  return ss.str();
655 }
656 
657 TESTSTATICS ( matrixCleverTranspose2Test, "Measurements of allocation and definition, transposition, deletion times, but with a clever transposition algorithm and same rows_fetch as openMP version" );
658 
659 matrixCleverTranspose2Test::matrixCleverTranspose2Test() : performanceTest<int, int> ( "MatrixCleverTranspose2" )
660 {
661  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
662  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
663  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
664 }
665 
666 void matrixCleverTranspose2Test::actualTestMethod ( tester &test, int param1, int param2 )
667 {
668  const global_bytesize size = param1;
669  const global_bytesize memlines = param2;
670  const global_bytesize mem = size * sizeof ( double ) * memlines;
671  const global_bytesize swapmem = size * size * sizeof ( double ) * 2;
672 
675 
676  test.addTimeMeasurement();
677 
678  // Allocate and set
679  managedPtr<double> *rows[size];
680  for ( unsigned int i = 0; i < size; ++i ) {
681  rows[i] = new managedPtr<double> ( size );
682  adhereTo<double> rowloc ( *rows[i] );
683  double *rowdbl = rowloc;
684  for ( unsigned int j = 0; j < size; ++j ) {
685  rowdbl[j] = i * size + j;
686  }
687  }
688 
689  test.addTimeMeasurement();
690 
691  // Transpose blockwise
692  unsigned int ompt = 2;
693 #ifndef OpenMP_NOT_FOUND
694  ompt = omp_get_max_threads();
695 #endif
696  unsigned int rows_fetch = memlines / ( 4 * ompt ) > size ? size : memlines / ( 4 * ompt );
697  unsigned int n_blocks = size / rows_fetch + ( size % rows_fetch == 0 ? 0 : 1 );
698 
699  adhereTo<double> *Arows[rows_fetch];
700  adhereTo<double> *Brows[rows_fetch];
701 
702  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
703  for ( unsigned int ii = 0; ii <= jj; ii++ ) {
704  //A_iijj <-> B_jjii
705 
706  //Reserve rows ii and jj
707  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
708  unsigned int j_lim = ( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
709  unsigned int i_off = ii * rows_fetch; // Block A, vertical index
710  unsigned int j_off = jj * rows_fetch; // Block A, horizontal index
711 
712  //Get rows A_ii** and B_jj** into memory:
713  for ( unsigned int i = 0; i < i_lim; ++i ) {
714  Arows[i] = new adhereTo<double> ( *rows[i + i_off] );
715  }
716  for ( unsigned int j = 0; j < j_lim; ++j ) {
717  Brows[j] = new adhereTo<double> ( *rows[j + j_off] );
718  }
719 
720  for ( unsigned int j = 0; j < j_lim; j++ ) {
721  for ( unsigned int i = 0; i < ( jj == ii ? j : i_lim ); i++ ) { //Inner block matrix transpose, vertical index in A
722  //Inner block matrxi transpose, horizontal index in A
723  double *Arowdb = *Arows[i]; //Fetch pointer for Element of A_ii+i
724  double *Browdb = *Brows[j];
725 
726  double inter = Arowdb[j_off + j]; //Store inner element A_ij
727  Arowdb[j + j_off] = Browdb[ i + i_off]; //Override with element of B_ji
728  Browdb[i + i_off] = inter; //set B_ji to former val of A_ij
729  }
730  }
731 
732  for ( unsigned int i = 0; i < i_lim; ++i ) {
733  delete ( Arows[i] );
734  }
735  for ( unsigned int j = 0; j < j_lim; ++j ) {
736  delete ( Brows[j] );
737  }
738  }
739  }
740 
741  test.addTimeMeasurement();
742 
743 #ifdef PTEST_CHECKS
744  for ( unsigned int i = 0; i < size; ++i ) {
745  adhereTo<double> rowloc ( *rows[i] );
746  double *rowdbl = rowloc;
747  for ( unsigned int j = 0; j < size; ++j ) {
748  if ( rowdbl[j] != j * size + i ) {
749  printf ( "Failed check!\n" );
750  }
751  }
752  }
753 #endif
754 
755  // Delete
756  for ( unsigned int i = 0; i < size; ++i ) {
757  delete rows[i];
758  }
759 
760  test.addTimeMeasurement();
761 }
762 
763 string matrixCleverTranspose2Test::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
764 {
765  stringstream ss;
766  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
767  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
768  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
769  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
770  return ss.str();
771 }
772 
773 
774 #ifndef OpenMP_NOT_FOUND
775 TESTSTATICS ( matrixCleverTransposeOpenMPTest, "Same as cleverTranspose, but with OpenMP" );
776 
777 matrixCleverTransposeOpenMPTest::matrixCleverTransposeOpenMPTest() : performanceTest<int, int> ( "MatrixCleverTransposeOpenMP" )
778 {
779  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
780  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
781  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
782 }
783 
784 void matrixCleverTransposeOpenMPTest::actualTestMethod ( tester &test, int param1, int param2 )
785 {
786  const global_bytesize size = param1;
787  const global_bytesize memlines = param2;
788  const global_bytesize mem = size * sizeof ( double ) * memlines;
789  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
790 
793 
794  test.addTimeMeasurement();
795 
796  // Allocate and set
797  managedPtr<double> *rows[size];
798  #pragma omp parallel for
799  for ( unsigned int i = 0; i < size; ++i ) {
800  rows[i] = new managedPtr<double> ( size );
801  adhereTo<double> rowloc ( *rows[i] );
802  double *rowdbl = rowloc;
803  for ( unsigned int j = 0; j < size; ++j ) {
804  rowdbl[j] = i * size + j;
805  }
806  }
807 
808  test.addTimeMeasurement();
809 
810  // Transpose blockwise, leave a bit free space, if not, we're stuck in the process...
811 
812  unsigned int ompt = 2;
813 #ifndef OpenMP_NOT_FOUND
814  ompt = omp_get_max_threads();
815 #endif
816  unsigned int rows_fetch = memlines / ( 4 * ompt ) > size ? size : memlines / ( 4 * ompt );
817  unsigned int n_blocks = size / rows_fetch + ( size % rows_fetch == 0 ? 0 : 1 );
818 
819 
820  #pragma omp parallel for
821  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
822  for ( unsigned int ii = 0; ii <= jj; ii++ ) {
823  //A_iijj <-> B_jjii
824 
825  //Reserve rows ii and jj
826  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
827  unsigned int j_lim = ( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
828  unsigned int i_off = ii * rows_fetch; // Block A, vertical index
829  unsigned int j_off = jj * rows_fetch; // Block A, horizontal index
830 
831  //Get rows A_ii** and B_jj** into memory:
832  adhereTo<double> *Arows[rows_fetch];
833  adhereTo<double> *Brows[rows_fetch];
834  for ( unsigned int i = 0; i < i_lim; ++i ) {
835  Arows[i] = new adhereTo<double> ( *rows[i + i_off], true );
836  }
837  for ( unsigned int j = 0; j < j_lim; ++j ) {
838  Brows[j] = new adhereTo<double> ( *rows[j + j_off], true );
839  }
840  for ( unsigned int j = 0; j < j_lim; j++ ) {
841  for ( unsigned int i = 0; i < ( jj == ii ? j : i_lim ); i++ ) { //Inner block matrix transpose, vertical index in A
842  //Inner block matrxi transpose, horizontal index in A
843  double *Arowdb, *Browdb;
844 
845  {
846  LISTOFINGREDIENTS
847 
848  Arowdb = *Arows[i]; //Fetch pointer for Element of A_ii+i
849  Browdb = *Brows[j];
850  }
851 
852  double inter = Arowdb[j_off + j]; //Store inner element A_ij
853  Arowdb[j + j_off] = Browdb[ i + i_off]; //Override with element of B_ji
854  Browdb[i + i_off] = inter; //set B_ji to former val of A_ij
855  }
856  }
857  for ( unsigned int i = 0; i < i_lim; ++i ) {
858  delete ( Arows[i] );
859  }
860  for ( unsigned int j = 0; j < j_lim; ++j ) {
861  delete ( Brows[j] );
862  }
863  }
864  }
865 
866 
867  test.addTimeMeasurement();
868 
869 #ifdef PTEST_CHECKS
870  for ( unsigned int i = 0; i < size; ++i ) {
871  adhereTo<double> rowloc ( *rows[i] );
872  double *rowdbl = rowloc;
873  for ( unsigned int j = 0; j < size; ++j ) {
874  if ( rowdbl[j] != j * size + i ) {
875  printf ( "Failed check!\n" );
876  }
877  }
878  }
879 #endif
880 
881  // Delete
882  #pragma omp parallel for
883  for ( unsigned int i = 0; i < size; ++i ) {
884  delete rows[i];
885  }
886  test.addTimeMeasurement();
887 }
888 
889 string matrixCleverTransposeOpenMPTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
890 {
891  stringstream ss;
892  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
893  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
894  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
895  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
896  return ss.str();
897 }
898 #endif
899 
900 
901 TESTSTATICS ( matrixCleverBlockTransposeTest, "Same as cleverTranspose, but with blockwise multiplication" );
902 
903 matrixCleverBlockTransposeTest::matrixCleverBlockTransposeTest() : performanceTest<int, int> ( "MatrixCleverBlockTranspose" )
904 {
905  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
906  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
907  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
908 }
909 
910 void matrixCleverBlockTransposeTest::actualTestMethod ( tester &test, int param1, int param2 )
911 {
912  const global_bytesize size = param1;
913  const global_bytesize memlines = param2;
914  const global_bytesize mem = size * sizeof ( double ) * memlines;
915  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
916 
919 
920 
921  test.addTimeMeasurement();
922 
923  // Transpose blockwise, leave a bit free space, if not, we're stuck in the process...
924  unsigned int rows_fetch = sqrt ( memlines * size / 2 );
925  unsigned int blocksize = rows_fetch * rows_fetch;
926 
927  unsigned int n_blocks = size / rows_fetch + ( size % rows_fetch == 0 ? 0 : 1 );
928 
929  //Blocks are rows_fetch² matrices stored in n_blocks² blocks.
930 
931 #define blockIdx(x,y) ((x/rows_fetch)*n_blocks+y/rows_fetch)
932 #define inBlockX(x,y) (x%rows_fetch)
933 #define inBlockY(x,y) (y%rows_fetch)
934 #define inBlockIdx(x,y) (inBlockX(x,y)*rows_fetch+inBlockY(x,y))
935  // Allocate and set
936  managedPtr<double> *rows[n_blocks * n_blocks];
937  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
938  for ( unsigned int ii = 0; ii < n_blocks; ii++ ) {
939  rows[ii * n_blocks + jj] = new managedPtr<double> ( blocksize );
940  adhereTo<double> adh ( *rows[ii * n_blocks + jj] );
941  double *locPtr = adh;
942  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
943  unsigned int j_lim = rows_fetch;//( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
944  for ( unsigned int i = 0; i < i_lim; i++ ) {
945  for ( unsigned int j = 0; j < j_lim; j++ ) {
946  locPtr[i * rows_fetch + j] = ( ii * rows_fetch + i ) * size + ( j + rows_fetch * jj );
947  }
948  }
949  }
950  }
951  test.addTimeMeasurement();
952 
953  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
954  for ( unsigned int ii = 0; ii <= jj; ii++ ) {
955  //A_iijj <-> B_jjii
956 
957  //Reserve rows ii and jj
958  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
959  unsigned int j_lim = ( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
960 
961  //Get rows A_ii** and B_jj** into memory:
962  adhereTo<double> aBlock ( *rows[ii * n_blocks + jj] );
963  adhereTo<double> bBlock ( *rows[jj * n_blocks + ii] );
964 
965  double *aLoc = aBlock;
966  double *bLoc = bBlock;
967 
968  for ( unsigned int j = 0; j < j_lim; j++ ) {
969  for ( unsigned int i = 0; i < ( jj == ii ? j : i_lim ); i++ ) { //Inner block matrix transpose, vertical index in A
970  //Inner block matrxi transpose, horizontal index in A
971  double inter = aLoc[i * rows_fetch + j]; //Store inner element A_ij
972  aLoc[i * rows_fetch + j] = bLoc[j * rows_fetch + i]; //Override with element of B_ji
973  bLoc[j * rows_fetch + i] = inter; //set B_ji to former val of A_ij
974  }
975  }
976  }
977  }
978 
979  test.addTimeMeasurement();
980 
981 #ifdef PTEST_CHECKS
982  for ( unsigned int i = 0; i < size; ++i ) {
983  for ( unsigned int j = 0; j < size; ++j ) {
984  unsigned int blckidx = blockIdx ( i, j );
985  unsigned int inblck = inBlockIdx ( i, j );
986  adhereTo<double> adh ( *rows[blckidx] );
987  double *loc = adh;
988  if ( loc[inblck] != j * size + i ) {
989  printf ( "Failed check!\n" );
990  }
991  }
992  }
993 #endif
994 
995  // Delete
996  for ( unsigned int i = 0; i < n_blocks * n_blocks; ++i ) {
997  delete rows[i];
998  }
999 
1000  test.addTimeMeasurement();
1001 }
1002 
1003 string matrixCleverBlockTransposeTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1004 {
1005  stringstream ss;
1006  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1007  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
1008  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1009  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1010  return ss.str();
1011 }
1012 
1013 
1014 #ifndef OpenMP_NOT_FOUND
1015 TESTSTATICS ( matrixCleverBlockTransposeOpenMPTest, "Same as cleverTranspose, but with OpenMP and blockwise multiplication" );
1016 
1017 matrixCleverBlockTransposeOpenMPTest::matrixCleverBlockTransposeOpenMPTest() : performanceTest<int, int> ( "MatrixCleverBlockTransposeOpenMP" )
1018 {
1019  TESTPARAM ( 1, 10, 8000, 20, true, 4000, "Matrix size per dimension" );
1020  TESTPARAM ( 2, 1000, 10000, 20, true, 2000, "Matrix rows in main memory" );
1021  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Transposition", "Deletion"} );
1022 }
1023 
1024 void matrixCleverBlockTransposeOpenMPTest::actualTestMethod ( tester &test, int param1, int param2 )
1025 {
1026  const global_bytesize size = param1;
1027  const global_bytesize memlines = param2;
1028  const global_bytesize mem = size * sizeof ( double ) * memlines;
1029  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1030 
1032  rambrainglobals::config.resizeSwap ( swapmem );
1033 
1034 
1035  test.addTimeMeasurement();
1036 
1037  // Transpose blockwise, leave a bit free space, if not, we're stuck in the process...
1038 
1039  unsigned int ompt = 2;
1040 #ifndef OpenMP_NOT_FOUND
1041  ompt = omp_get_max_threads();
1042 #endif
1043  unsigned int rows_fetch = sqrt ( memlines * size / 2 / ompt );
1044  unsigned int blocksize = rows_fetch * rows_fetch;
1045 
1046  unsigned int n_blocks = size / rows_fetch + ( size % rows_fetch == 0 ? 0 : 1 );
1047 
1048  //Blocks are rows_fetch² matrices stored in n_blocks² blocks.
1049 
1050 #define blockIdx(x,y) ((x/rows_fetch)*n_blocks+y/rows_fetch)
1051 #define inBlockX(x,y) (x%rows_fetch)
1052 #define inBlockY(x,y) (y%rows_fetch)
1053 #define inBlockIdx(x,y) (inBlockX(x,y)*rows_fetch+inBlockY(x,y))
1054  // Allocate and set
1055  managedPtr<double> *rows[n_blocks * n_blocks];
1056  // Enable overcommiting:
1057  managedMemory::defaultManager->setOutOfSwapIsFatal ( false );
1058  #pragma omp parallel for schedule(dynamic)
1059  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
1060  for ( unsigned int ii = 0; ii < n_blocks; ii++ ) {
1061  rows[ii * n_blocks + jj] = new managedPtr<double> ( blocksize );
1062  adhereTo<double> adh ( *rows[ii * n_blocks + jj] );
1063  double *locPtr = adh;
1064  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
1065  unsigned int j_lim = rows_fetch;//( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
1066  for ( unsigned int i = 0; i < i_lim; i++ ) {
1067  for ( unsigned int j = 0; j < j_lim; j++ ) {
1068  locPtr[i * rows_fetch + j] = ( ii * rows_fetch + i ) * size + ( j + rows_fetch * jj );
1069  }
1070  }
1071  }
1072  }
1073  test.addTimeMeasurement();
1074 
1075  #pragma omp parallel for ordered schedule(dynamic)
1076  for ( unsigned int jj = 0; jj < n_blocks; jj++ ) {
1077 
1078  for ( unsigned int ii = 0; ii <= jj; ii++ ) {
1079  //A_iijj <-> B_jjii
1080 
1081  //Reserve rows ii and jj
1082  unsigned int i_lim = ( ii + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, vertical limit
1083  unsigned int j_lim = ( jj + 1 == n_blocks && size % rows_fetch != 0 ? size % rows_fetch : rows_fetch ); // Block A, horizontal limit
1084 
1085  //Get rows A_ii** and B_jj** into memory:
1086  adhereTo<double> aBlock ( *rows[ii * n_blocks + jj] );
1087  adhereTo<double> bBlock ( *rows[jj * n_blocks + ii] );
1088 
1089  double *aLoc, *bLoc;
1090  {
1091  LISTOFINGREDIENTS;
1092  aLoc = aBlock;
1093  bLoc = bBlock;
1094  }
1095  for ( unsigned int j = 0; j < j_lim; j++ ) {
1096  for ( unsigned int i = 0; i < ( jj == ii ? j : i_lim ); i++ ) { //Inner block matrix transpose, vertical index in A
1097  //Inner block matrxi transpose, horizontal index in A
1098  double inter = aLoc[i * rows_fetch + j]; //Store inner element A_ij
1099  aLoc[i * rows_fetch + j] = bLoc[j * rows_fetch + i]; //Override with element of B_ji
1100  bLoc[j * rows_fetch + i] = inter; //set B_ji to former val of A_ij
1101  }
1102  }
1103  }
1104  }
1105 
1106  test.addTimeMeasurement();
1107 
1108 #ifdef PTEST_CHECKS
1109  for ( unsigned int i = 0; i < size; ++i ) {
1110  for ( unsigned int j = 0; j < size; ++j ) {
1111  unsigned int blckidx = blockIdx ( i, j );
1112  unsigned int inblck = inBlockIdx ( i, j );
1113  adhereTo<double> adh ( *rows[blckidx] );
1114  double *loc = adh;
1115  if ( loc[inblck] != j * size + i ) {
1116  printf ( "Failed check!\n" );
1117  }
1118  }
1119  }
1120 #endif
1121 
1122  // Delete
1123  #pragma omp parallel for
1124  for ( unsigned int i = 0; i < n_blocks * n_blocks; ++i ) {
1125  delete rows[i];
1126  }
1127 
1128  test.addTimeMeasurement();
1129 }
1130 
1131 string matrixCleverBlockTransposeOpenMPTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1132 {
1133  stringstream ss;
1134  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1135  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Transposition\", \\" << endl;
1136  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1137  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1138  return ss.str();
1139 }
1140 #endif
1141 
1142 
1143 TESTSTATICS ( matrixMultiplyTest, "Matrix multiplication with matrices being stored in columns / rows" );
1144 
1145 matrixMultiplyTest::matrixMultiplyTest() : performanceTest<int, int> ( "MatrixMultiply" )
1146 {
1147  TESTPARAM ( 1, 10, 6000, 20, true, 4000, "Matrix size per dimension" );
1148  TESTPARAM ( 2, 4000, 15000, 20, true, 6000, "Matrix rows in main memory" );
1149  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Multiplication", "Deletion"} );
1150 }
1151 
1152 void matrixMultiplyTest::actualTestMethod ( tester &test, int param1, int param2 )
1153 {
1154  const global_bytesize size = param1;
1155  const global_bytesize memlines = param2;
1156  const global_bytesize mem = size * sizeof ( double ) * memlines;
1157  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1158 
1160  rambrainglobals::config.resizeSwap ( swapmem );
1161 
1162 
1163  test.addTimeMeasurement();
1164 
1165  // Allocate and set matrixes A, B and C
1166  managedPtr<double> *rowsA[size];
1167  managedPtr<double> *colsB[size];
1168  managedPtr<double> *rowsC[size];
1169  for ( global_bytesize i = 0; i < size; ++i ) {
1170  rowsA[i] = new managedPtr<double> ( size );
1171  colsB[i] = new managedPtr<double> ( size );
1172  rowsC[i] = new managedPtr<double> ( size );
1173 
1174  adhereTo<double> adhRowA ( *rowsA[i] );
1175  adhereTo<double> adhColB ( *colsB[i] );
1176  adhereTo<double> adhRowC ( *rowsC[i] );
1177 
1178  double *rowA = adhRowA;
1179  double *colB = adhColB;
1180  double *rowC = adhRowC;
1181 
1182  for ( global_bytesize j = 0; j < size; ++j ) {
1183  rowA[j] = j;
1184  // B = transpose(A)
1185  colB[j] = j;
1186  rowC[j] = 0.0;
1187  }
1188  }
1189 
1190  test.addTimeMeasurement();
1191 
1192  // Calculate C = A * B
1193  for ( global_bytesize i = 0; i < size; ++i ) {
1194  adhereTo<double> adhRowA ( *rowsA[i] );
1195  adhereTo<double> adhRowC ( *rowsC[i] );
1196  double *rowA = adhRowA;
1197  double *rowC = adhRowC;
1198  for ( global_bytesize j = 0; j < size; ++j ) {
1199  adhereTo<double> adhColB ( *colsB[j] );
1200  double *colB = adhColB;
1201  double erg = 0;
1202 
1203  for ( global_bytesize k = 0; k < size; ++k ) {
1204  erg += rowA[k] * colB[k];
1205  }
1206  rowC[j] += erg;
1207  }
1208  }
1209 
1210  test.addTimeMeasurement();
1211 
1212 #ifdef PTEST_CHECKS
1213  double val = 0.0;
1214  for ( global_bytesize i = 1; i <= size; ++i ) {
1215  val += i * i;
1216  }
1217  for ( global_bytesize i = 0; i < size; ++i ) {
1218  adhereTo<double> adhRowC ( *rowsC[i] );
1219 
1220  double *rowC = adhRowC;
1221 
1222  for ( global_bytesize j = 0; j < size; ++j ) {
1223  if ( rowC[j] != val ) {
1224  printf ( "Failed check!\n" );
1225  }
1226  }
1227  }
1228 #endif
1229 
1230  // Delete
1231  for ( global_bytesize i = 0; i < size; ++i ) {
1232  delete rowsA[i];
1233  delete colsB[i];
1234  delete rowsC[i];
1235  }
1236 
1237  test.addTimeMeasurement();
1238 }
1239 
1240 string matrixMultiplyTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1241 {
1242  stringstream ss;
1243  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1244  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Multiplication\", \\" << endl;
1245  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1246  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1247  return ss.str();
1248 }
1249 
1250 
1251 #ifndef OpenMP_NOT_FOUND
1252 TESTSTATICS ( matrixMultiplyOpenMPTest, "Matrix multiplication with matrices being stored in columns / rows" );
1253 
1254 matrixMultiplyOpenMPTest::matrixMultiplyOpenMPTest() : performanceTest<int, int> ( "MatrixMultiplyOpenMP" )
1255 {
1256  TESTPARAM ( 1, 10, 6000, 20, true, 4000, "Matrix size per dimension" );
1257  TESTPARAM ( 2, 4000, 15000, 20, true, 6000, "Matrix rows in main memory" );
1258  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Multiplication", "Deletion"} );
1259 }
1260 
1261 void matrixMultiplyOpenMPTest::actualTestMethod ( tester &test, int param1, int param2 )
1262 {
1263  const global_bytesize size = param1;
1264  const global_bytesize memlines = param2;
1265  const global_bytesize mem = size * sizeof ( double ) * memlines;
1266  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1267 
1269  rambrainglobals::config.resizeSwap ( swapmem );
1270 
1271 
1272  test.addTimeMeasurement();
1273 
1274  // Allocate and set matrixes A, B and C
1275  managedPtr<double> *rowsA[size];
1276  managedPtr<double> *colsB[size];
1277  managedPtr<double> *rowsC[size];
1278 
1279  #pragma omp parallel for
1280  for ( global_bytesize i = 0; i < size; ++i ) {
1281  rowsA[i] = new managedPtr<double> ( size );
1282  colsB[i] = new managedPtr<double> ( size );
1283  rowsC[i] = new managedPtr<double> ( size );
1284 
1285  adhereTo<double> adhRowA ( *rowsA[i] );
1286  adhereTo<double> adhColB ( *colsB[i] );
1287  adhereTo<double> adhRowC ( *rowsC[i] );
1288 
1289  double *rowA = adhRowA;
1290  double *colB = adhColB;
1291  double *rowC = adhRowC;
1292 
1293  for ( global_bytesize j = 0; j < size; ++j ) {
1294  rowA[j] = j;
1295  colB[j] = j;
1296  rowC[j] = 0.0;
1297  }
1298  }
1299 
1300  test.addTimeMeasurement();
1301 
1302  // Calculate C = A * B
1303  for ( global_bytesize i = 0; i < size; ++i ) {
1304  adhereTo<double> adhRowA ( *rowsA[i] );
1305  adhereTo<double> adhRowC ( *rowsC[i] );
1306  double *rowA = adhRowA;
1307  double *rowC = adhRowC;
1308  #pragma omp parallel for
1309  for ( global_bytesize j = 0; j < size; ++j ) {
1310  adhereTo<double> adhColB ( *colsB[j] );
1311  double *colB = adhColB;
1312  double erg = 0;
1313 
1314  for ( global_bytesize k = 0; k < size; ++k ) {
1315  erg += rowA[k] * colB[k];
1316  }
1317  rowC[j] += erg;
1318  }
1319  }
1320 
1321  test.addTimeMeasurement();
1322 
1323 #ifdef PTEST_CHECKS
1324  double val = 0.0;
1325  for ( global_bytesize i = 1; i <= size; ++i ) {
1326  val += i * i;
1327  }
1328  for ( global_bytesize i = 0; i < size; ++i ) {
1329  adhereTo<double> adhRowC ( *rowsC[i] );
1330 
1331  double *rowC = adhRowC;
1332 
1333  for ( global_bytesize j = 0; j < size; ++j ) {
1334  if ( rowC[j] != val ) {
1335  printf ( "Failed check!\n" );
1336  }
1337  }
1338  }
1339 #endif
1340 
1341  // Delete
1342  #pragma omp parallel for
1343  for ( global_bytesize i = 0; i < size; ++i ) {
1344  delete rowsA[i];
1345  delete colsB[i];
1346  delete rowsC[i];
1347  }
1348 
1349  test.addTimeMeasurement();
1350 }
1351 
1352 string matrixMultiplyOpenMPTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1353 {
1354  stringstream ss;
1355  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1356  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Multiplication\", \\" << endl;
1357  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1358  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1359  return ss.str();
1360 }
1361 #endif
1362 
1363 
1364 TESTSTATICS ( matrixCopyTest, "Copy one matrix onto another" );
1365 
1366 matrixCopyTest::matrixCopyTest() : performanceTest<int, int> ( "MatrixCopy" )
1367 {
1368  TESTPARAM ( 1, 100, 10000, 20, true, 5000, "Matrix size per dimension" );
1369  TESTPARAM ( 2, 100, 10000, 20, true, 5000, "Matrix rows in main memory" );
1370  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Copy", "Deletion"} );
1371 }
1372 
1373 void matrixCopyTest::actualTestMethod ( tester &test, int param1, int param2 )
1374 {
1375  const global_bytesize size = param1;
1376  const global_bytesize memlines = param2;
1377  const global_bytesize mem = size * sizeof ( double ) * memlines;
1378  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1379 
1381  rambrainglobals::config.resizeSwap ( swapmem );
1382 
1383 
1384  test.addTimeMeasurement();
1385 
1386  // Allocate and set matrixes A, B
1387  managedPtr<double> *A[size];
1388  managedPtr<double> *B[size];
1389 
1390  for ( global_bytesize i = 0; i < size; ++i ) {
1391  A[i] = new managedPtr<double> ( size );
1392  B[i] = new managedPtr<double> ( size );
1393 
1394  adhereTo<double> adhA ( *A[i] );
1395 
1396  double *a = adhA;
1397 
1398  for ( global_bytesize j = 0; j < size; ++j ) {
1399  a[j] = j;
1400  }
1401  }
1402 
1403  test.addTimeMeasurement();
1404 
1405  // Copy B = A
1406  for ( global_bytesize i = 0; i < size; ++i ) {
1407  adhereTo<double> adhA ( *A[i] );
1408  adhereTo<double> adhB ( *B[i] );
1409 
1410  double *a = adhA;
1411  double *b = adhB;
1412 
1413  for ( global_bytesize j = 0; j < size; ++j ) {
1414  b[j] = a[j];
1415  }
1416  }
1417 
1418  test.addTimeMeasurement();
1419 
1420 #ifdef PTEST_CHECKS
1421  for ( global_bytesize i = 0; i < size; ++i ) {
1422  adhereTo<double> adhB ( *B[i] );
1423 
1424  double *b = adhB;
1425 
1426  for ( global_bytesize j = 0; j < size; ++j ) {
1427  if ( b[j] != j ) {
1428  printf ( "Failed check!\n" );
1429  }
1430  }
1431  }
1432 #endif
1433 
1434  // Delete
1435  for ( global_bytesize i = 0; i < size; ++i ) {
1436  delete A[i];
1437  delete B[i];
1438  }
1439 
1440  test.addTimeMeasurement();
1441 }
1442 
1443 string matrixCopyTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1444 {
1445  stringstream ss;
1446  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1447  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Copy\", \\" << endl;
1448  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1449  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1450  return ss.str();
1451 }
1452 
1453 
1454 #ifndef OpenMP_NOT_FOUND
1455 TESTSTATICS ( matrixCopyOpenMPTest, "Copy one matrix onto another" );
1456 
1457 matrixCopyOpenMPTest::matrixCopyOpenMPTest() : performanceTest<int, int> ( "MatrixCopyOpenMP" )
1458 {
1459  TESTPARAM ( 1, 100, 10000, 20, true, 5000, "Matrix size per dimension" );
1460  TESTPARAM ( 2, 100, 10000, 20, true, 5000, "Matrix rows in main memory" );
1461  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Copy", "Deletion"} );
1462 }
1463 
1464 void matrixCopyOpenMPTest::actualTestMethod ( tester &test, int param1, int param2 )
1465 {
1466  const global_bytesize size = param1;
1467  const global_bytesize memlines = param2;
1468  const global_bytesize mem = size * sizeof ( double ) * memlines;
1469  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1470 
1472  rambrainglobals::config.resizeSwap ( swapmem );
1473 
1474 
1475  test.addTimeMeasurement();
1476 
1477  // Allocate and set matrixes A, B
1478  managedPtr<double> *A[size];
1479  managedPtr<double> *B[size];
1480 
1481  #pragma omp parallel for
1482  for ( global_bytesize i = 0; i < size; ++i ) {
1483  A[i] = new managedPtr<double> ( size );
1484  B[i] = new managedPtr<double> ( size );
1485 
1486  adhereTo<double> adhA ( *A[i] );
1487 
1488  double *a = adhA;
1489 
1490  for ( global_bytesize j = 0; j < size; ++j ) {
1491  a[j] = j;
1492  }
1493  }
1494 
1495  test.addTimeMeasurement();
1496 
1497  // Copy B = A
1498  #pragma omp parallel for
1499  for ( global_bytesize i = 0; i < size; ++i ) {
1500  adhereTo<double> adhA ( *A[i] );
1501  adhereTo<double> adhB ( *B[i] );
1502 
1503  double *a = adhA;
1504  double *b = adhB;
1505 
1506  for ( global_bytesize j = 0; j < size; ++j ) {
1507  b[j] = a[j];
1508  }
1509  }
1510 
1511  test.addTimeMeasurement();
1512 
1513 #ifdef PTEST_CHECKS
1514  #pragma omp parallel for
1515  for ( global_bytesize i = 0; i < size; ++i ) {
1516  adhereTo<double> adhB ( *B[i] );
1517 
1518  double *b = adhB;
1519 
1520  for ( global_bytesize j = 0; j < size; ++j ) {
1521  if ( b[j] != j ) {
1522  printf ( "Failed check!\n" );
1523  }
1524  }
1525  }
1526 #endif
1527 
1528  // Delete
1529  #pragma omp parallel for
1530  for ( global_bytesize i = 0; i < size; ++i ) {
1531  delete A[i];
1532  delete B[i];
1533  }
1534 
1535  test.addTimeMeasurement();
1536 }
1537 
1538 string matrixCopyOpenMPTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1539 {
1540  stringstream ss;
1541  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1542  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Copy\", \\" << endl;
1543  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1544  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1545  return ss.str();
1546 }
1547 #endif
1548 
1549 
1550 TESTSTATICS ( matrixDoubleCopyTest, "Copy one matrix onto another and back" );
1551 
1552 matrixDoubleCopyTest::matrixDoubleCopyTest() : performanceTest<int, int> ( "MatrixDoubleCopy" )
1553 {
1554  TESTPARAM ( 1, 100, 10000, 20, true, 5000, "Matrix size per dimension" );
1555  TESTPARAM ( 2, 100, 10000, 20, true, 5000, "Matrix rows in main memory" );
1556  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Copy", "Deletion"} );
1557 }
1558 
1559 void matrixDoubleCopyTest::actualTestMethod ( tester &test, int param1, int param2 )
1560 {
1561  const global_bytesize size = param1;
1562  const global_bytesize memlines = param2;
1563  const global_bytesize mem = size * sizeof ( double ) * memlines;
1564  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1565 
1567  rambrainglobals::config.resizeSwap ( swapmem );
1568 
1569 
1570  test.addTimeMeasurement();
1571 
1572  // Allocate and set matrixes A, B
1573  managedPtr<double> *A[size];
1574  managedPtr<double> *B[size];
1575 
1576  for ( global_bytesize i = 0; i < size; ++i ) {
1577  A[i] = new managedPtr<double> ( size );
1578  B[i] = new managedPtr<double> ( size );
1579 
1580  adhereTo<double> adhA ( *A[i] );
1581 
1582  double *a = adhA;
1583 
1584  for ( global_bytesize j = 0; j < size; ++j ) {
1585  a[j] = j;
1586  }
1587  }
1588 
1589  test.addTimeMeasurement();
1590 
1591  // Copy B = A
1592  for ( global_bytesize i = 0; i < size; ++i ) {
1593  adhereTo<double> adhA ( *A[i] );
1594  adhereTo<double> adhB ( *B[i] );
1595 
1596  double *a = adhA;
1597  double *b = adhB;
1598 
1599  for ( global_bytesize j = 0; j < size; ++j ) {
1600  b[j] = a[j];
1601  }
1602  }
1603 
1604  // Copy A = B
1605  for ( global_bytesize i = 0; i < size; ++i ) {
1606  adhereTo<double> adhA ( *A[i] );
1607  adhereTo<double> adhB ( *B[i] );
1608 
1609  double *a = adhA;
1610  double *b = adhB;
1611 
1612  for ( global_bytesize j = 0; j < size; ++j ) {
1613  a[j] = b[j];
1614  }
1615  }
1616 
1617  test.addTimeMeasurement();
1618 
1619 #ifdef PTEST_CHECKS
1620  for ( global_bytesize i = 0; i < size; ++i ) {
1621  adhereTo<double> adhB ( *B[i] );
1622  adhereTo<double> adhA ( *A[i] );
1623  double *b = adhB;
1624  double *a = adhA;
1625 
1626  for ( global_bytesize j = 0; j < size; ++j ) {
1627  if ( a[j] != j || b[j] != j ) {
1628  printf ( "Failed check!\n" );
1629  }
1630  }
1631  }
1632 #endif
1633 
1634  // Delete
1635  for ( global_bytesize i = 0; i < size; ++i ) {
1636  delete A[i];
1637  delete B[i];
1638  }
1639 
1640  test.addTimeMeasurement();
1641 }
1642 
1643 string matrixDoubleCopyTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1644 {
1645  stringstream ss;
1646  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1647  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Copy\", \\" << endl;
1648  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1649  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1650  return ss.str();
1651 }
1652 
1653 
1654 #ifndef OpenMP_NOT_FOUND
1655 TESTSTATICS ( matrixDoubleCopyOpenMPTest, "Copy one matrix onto another and back" );
1656 
1657 matrixDoubleCopyOpenMPTest::matrixDoubleCopyOpenMPTest() : performanceTest<int, int> ( "MatrixDoubleCopyOpenMP" )
1658 {
1659  TESTPARAM ( 1, 100, 10000, 20, true, 5000, "Matrix size per dimension" );
1660  TESTPARAM ( 2, 100, 10000, 20, true, 5000, "Matrix rows in main memory" );
1661  plotParts = vector<string> ( {"Allocation \\\\& Definition", "Copy", "Deletion"} );
1662 }
1663 
1664 void matrixDoubleCopyOpenMPTest::actualTestMethod ( tester &test, int param1, int param2 )
1665 {
1666  const global_bytesize size = param1;
1667  const global_bytesize memlines = param2;
1668  const global_bytesize mem = size * sizeof ( double ) * memlines;
1669  const global_bytesize swapmem = size * size * sizeof ( double ) * 4;
1670 
1672  rambrainglobals::config.resizeSwap ( swapmem );
1673 
1674 
1675  test.addTimeMeasurement();
1676 
1677  // Allocate and set matrixes A, B
1678  managedPtr<double> *A[size];
1679  managedPtr<double> *B[size];
1680 
1681  #pragma omp parallel for
1682  for ( global_bytesize i = 0; i < size; ++i ) {
1683  A[i] = new managedPtr<double> ( size );
1684  B[i] = new managedPtr<double> ( size );
1685 
1686  adhereTo<double> adhA ( *A[i] );
1687 
1688  double *a = adhA;
1689 
1690  for ( global_bytesize j = 0; j < size; ++j ) {
1691  a[j] = j;
1692  }
1693  }
1694 
1695  test.addTimeMeasurement();
1696 
1697  // Copy B = A
1698  #pragma omp parallel for
1699  for ( global_bytesize i = 0; i < size; ++i ) {
1700  adhereTo<double> adhA ( *A[i] );
1701  adhereTo<double> adhB ( *B[i] );
1702 
1703  double *a = adhA;
1704  double *b = adhB;
1705 
1706  for ( global_bytesize j = 0; j < size; ++j ) {
1707  b[j] = a[j];
1708  }
1709  }
1710 
1711  // Copy A = B
1712  #pragma omp parallel for
1713  for ( global_bytesize i = 0; i < size; ++i ) {
1714  adhereTo<double> adhA ( *A[i] );
1715  adhereTo<double> adhB ( *B[i] );
1716 
1717  double *a = adhA;
1718  double *b = adhB;
1719 
1720  for ( global_bytesize j = 0; j < size; ++j ) {
1721  a[j] = b[j];
1722  }
1723  }
1724 
1725  test.addTimeMeasurement();
1726 
1727 #ifdef PTEST_CHECKS
1728  #pragma omp parallel for
1729  for ( global_bytesize i = 0; i < size; ++i ) {
1730  adhereTo<double> adhA ( *A[i] );
1731  adhereTo<double> adhB ( *B[i] );
1732  double *a = adhA;
1733  double *b = adhB;
1734  for ( global_bytesize j = 0; j < size; ++j ) {
1735  if ( a[j] != j || b[j] != j ) {
1736  printf ( "Failed check!\n" );
1737  }
1738  }
1739  }
1740 #endif
1741 
1742  // Delete
1743  #pragma omp parallel for
1744  for ( global_bytesize i = 0; i < size; ++i ) {
1745  delete A[i];
1746  delete B[i];
1747  }
1748 
1749  test.addTimeMeasurement();
1750 }
1751 
1752 string matrixDoubleCopyOpenMPTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1753 {
1754  stringstream ss;
1755  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"Allocation \\\\& Definition\", \\" << endl;
1756  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Copy\", \\" << endl;
1757  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Deletion\", \\" << endl;
1758  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1759  return ss.str();
1760 }
1761 #endif
1762 
1763 
1764 TESTSTATICS ( measureThroughputTest, "Measures throughput under load" );
1765 
1766 measureThroughputTest::measureThroughputTest() : performanceTest<int, int> ( "MeasureThroughput" )
1767 {
1768  TESTPARAM ( 1, 1024, 1024000, 20, true, 1024000, "Byte size per used chunk" );
1769  TESTPARAM ( 2, 1, 200, 20, true, 100, "percentage of array that will be written to" );
1770  plotParts = vector<string> ( {"Set Use", "Prepare", "Calculation"} );
1771  plotTimingStats = false;
1772 }
1773 
1774 
1775 void measureThroughputTest::actualTestMethod ( tester &test, int bytesize , int load )
1776 {
1777  rambrainglobals::config.resizeMemory ( bytesize * 2 );
1778  rambrainglobals::config.resizeSwap ( bytesize * 2 );
1779 
1780  managedPtr<char> ptr[3] = {managedPtr<char> ( bytesize ), managedPtr<char> ( bytesize ), managedPtr<char> ( bytesize ) };
1781  adhereTo<char> *adh[3];
1782 
1783  float rewritetimes = load < 0 ? 1 : ( float ) load / 100.;
1784  int iterations = 10000;
1785 
1786  adh[0] = new adhereTo<char> ( ptr[0] ); //Request element to prepare
1787 
1788  std::chrono::duration<double> allSetuse ( 0 );
1789  std::chrono::duration<double> allPrepare ( 0 );
1790  std::chrono::duration<double> allCalc ( 0 );
1791 
1792  using namespace std::chrono;
1793 #ifdef PTEST_CHECKS
1794  double rewritetimesmin = rewritetimes;
1795  unsigned int iter[3];
1796 #endif
1797 
1798  for ( int i = 0; i < iterations; ++i ) {
1799  unsigned int use = ( i % 3 );
1800  unsigned int prepare = ( ( i + 1 ) % 3 );
1801 
1802 
1803  //Important: First say what you will use, then say, what you will use next!
1804 
1805  high_resolution_clock::time_point t0 = high_resolution_clock::now();
1806  char *loc = * ( adh[use] ); //Actually use the stuff.
1807  high_resolution_clock::time_point t1 = high_resolution_clock::now();
1808  if ( i != iterations - 1 ) {
1809  adh[prepare] = new adhereTo<char> ( ptr[prepare] , true ); //Request element to prepare
1810  }
1811  high_resolution_clock::time_point t2 = high_resolution_clock::now();
1812 
1813  for ( int r = 0; r < rewritetimes * bytesize; r++ ) {
1814  loc[r % bytesize] = r * i;
1815  }
1816  high_resolution_clock::time_point t3 = high_resolution_clock::now();
1817 
1818  std::chrono::duration<double> setuse = duration_cast<duration<double>> ( t1 - t0 );;
1819  std::chrono::duration<double> preparet = duration_cast<duration<double>> ( t2 - t1 );
1820  std::chrono::duration<double> calc = duration_cast<duration<double>> ( t3 - t2 );
1821  if ( load < 0 ) {
1822  rewritetimes *= ( preparet + setuse ) > calc ? 1.01 : .99;
1823  }
1824 
1825 #ifdef PTEST_CHECKS
1826  rewritetimesmin = rewritetimes < rewritetimesmin ? rewritetimes : rewritetimesmin;
1827  iter[use] = i;
1828 #endif
1829  allSetuse += setuse;
1830  allPrepare += preparet;
1831  allCalc += calc;
1832  delete adh[use];
1833  }
1834 
1835 #ifdef PTEST_CHECKS
1836  for ( int x = 0; x < 3; ++x ) {
1837  adhereTo<char> glue ( ptr[x] );
1838  char *loc = glue;
1839  for ( int r = 0; r < rewritetimesmin * bytesize; r++ )
1840  if ( loc[r % bytesize] != ( char ) ( r * iter[x] ) ) {
1841  errmsgf ( "Failed check! %d %d %d", x, iter[x], loc[r % bytesize] );
1842  }
1843  }
1844 #endif
1845  test.addExternalTime ( allSetuse );
1846  test.addExternalTime ( allPrepare );
1847  test.addExternalTime ( allCalc );
1848 }
1849 
1850 string measureThroughputTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1851 {
1852  stringstream ss;
1853  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines title \"SetUse\", \\" << endl;
1854  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Prepare\", \\" << endl;
1855  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Calculation\", \\" << endl;
1856  ss << "'" << file << "' using " << paramColumn << ":($5*100/($3+$4+$5)) with lines title \"busy time in \%\", \\" << endl;
1857  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines title \"Total\"";
1858  return ss.str();
1859 }
1860 
1861 
1862 TESTSTATICS ( measurePreemptiveSpeedupTest, "Measures preemptive vs non preemptive runtime" );
1863 
1864 measurePreemptiveSpeedupTest::measurePreemptiveSpeedupTest() : performanceTest<int, int> ( "MeasurePreemptiveSpeedup" )
1865 {
1866  TESTPARAM ( 1, 1024, 1024000, 20, true, 10240, "Byte size per used chunk" );
1867  TESTPARAM ( 2, 1, 200, 20, true, 100, "percentage of array that will be written to" );
1868  plotParts = vector<string> ( {"Set Use", "Prepare", "Calculation", \
1869  "Set Use *", "Prepare *", "Calculation *"
1870  } );
1871  plotTimingStats = false;
1872 }
1873 
1874 void measurePreemptiveSpeedupTest::actualTestMethod ( tester &test, int bytesize , int load )
1875 {
1876  unsigned int numel = 1024;
1877  rambrainglobals::config.resizeMemory ( numel / 2 * bytesize );
1878  rambrainglobals::config.resizeSwap ( numel * bytesize );
1879 
1880  managedPtr<char> *ptr[numel];
1881  for ( unsigned int n = 0; n < numel; ++n ) {
1882  ptr[n] = new managedPtr<char> ( bytesize );
1883  }
1884 
1885  float rewritetimes = load < 0 ? 1 : ( float ) load / 100.;
1886  int iterations = 10230;
1887 
1888 #ifdef PTEST_CHECKS
1889  double rewritetimesmin = rewritetimes;
1890  unsigned int iter[numel];
1891 #endif
1892 
1893  std::chrono::duration<double> allSetuse ( 0 );
1894  std::chrono::duration<double> allPrepare ( 0 );
1895  std::chrono::duration<double> allCalc ( 0 );
1896 
1897  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( true );
1898  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveUnloading ( true );
1899 
1900  using namespace std::chrono;
1901  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( true );
1902  for ( int i = 0; i < iterations; ++i ) {
1903  unsigned int use = ( i % numel );
1904 
1905  //Important: First say what you will use, then say, what you will use next!
1906  high_resolution_clock::time_point t0 = high_resolution_clock::now();
1907  adhereTo<char> glue ( ptr[use] );
1908  high_resolution_clock::time_point t1 = high_resolution_clock::now();
1909  char *loc = glue; //Actually use the stuff.
1910  high_resolution_clock::time_point t2 = high_resolution_clock::now();
1911 
1912  for ( int r = 0; r < rewritetimes * bytesize; r++ ) {
1913  loc[r % bytesize] = r * i;
1914  }
1915  high_resolution_clock::time_point t3 = high_resolution_clock::now();
1916 
1917  std::chrono::duration<double> setuse = duration_cast<duration<double>> ( t1 - t0 );
1918  std::chrono::duration<double> preparet = duration_cast<duration<double>> ( t2 - t1 );
1919  std::chrono::duration<double> calc = duration_cast<duration<double>> ( t3 - t2 );
1920  if ( load < 0 ) {
1921  rewritetimes *= ( preparet + setuse ) > calc ? 1.01 : .99;
1922  }
1923 #ifdef PTEST_CHECKS
1924  rewritetimesmin = rewritetimes < rewritetimesmin ? rewritetimes : rewritetimesmin;
1925  iter[use] = i;
1926 #endif
1927  allSetuse += setuse;
1928  allPrepare += preparet;
1929  allCalc += calc;
1930  }
1931 
1932  test.addExternalTime ( allSetuse );
1933  test.addExternalTime ( allPrepare );
1934  test.addExternalTime ( allCalc );
1935 
1936  std::chrono::duration<double> allSetuse2 ( 0 );
1937  std::chrono::duration<double> allPrepare2 ( 0 );
1938  std::chrono::duration<double> allCalc2 ( 0 );
1939 
1940  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( false );
1941  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveUnloading ( false );
1942  for ( int i = 0; i < iterations; ++i ) {
1943  unsigned int use = ( i % numel );
1944 
1945  //Important: First say what you will use, then say, what you will use next!
1946  high_resolution_clock::time_point t0 = high_resolution_clock::now();
1947  adhereTo<char> glue ( ptr[use] );
1948  high_resolution_clock::time_point t1 = high_resolution_clock::now();
1949  char *loc = glue; //Actually use the stuff.
1950  high_resolution_clock::time_point t2 = high_resolution_clock::now();
1951 
1952  for ( int r = 0; r < rewritetimes * bytesize; r++ ) {
1953  loc[r % bytesize] = r * i;
1954  }
1955  high_resolution_clock::time_point t3 = high_resolution_clock::now();
1956 
1957  std::chrono::duration<double> setuse = duration_cast<duration<double>> ( t1 - t0 );
1958  std::chrono::duration<double> preparet = duration_cast<duration<double>> ( t2 - t1 );
1959  std::chrono::duration<double> calc = duration_cast<duration<double>> ( t3 - t2 );
1960  if ( load < 0 ) {
1961  rewritetimes *= ( preparet + setuse ) > calc ? 1.01 : .99;
1962  }
1963 #ifdef PTEST_CHECKS
1964  rewritetimesmin = rewritetimes < rewritetimesmin ? rewritetimes : rewritetimesmin;
1965  iter[use] = i;
1966 #endif
1967  allSetuse2 += setuse;
1968  allPrepare2 += preparet;
1969  allCalc2 += calc;
1970  }
1971 
1972 #ifdef PTEST_CHECKS
1973  for ( unsigned int x = 0; x < numel; ++x ) {
1974  adhereTo<char> glue ( ptr[x] );
1975  char *loc = glue;
1976  for ( int r = 0; r < rewritetimesmin * bytesize; r++ )
1977  if ( loc[r % bytesize] != ( char ) ( r * iter[x] ) ) {
1978  errmsgf ( "Failed check! %d %d %d", x, iter[x], loc[r % bytesize] );
1979  }
1980  }
1981 #endif
1982  test.addExternalTime ( allSetuse2 );
1983  test.addExternalTime ( allPrepare2 );
1984  test.addExternalTime ( allCalc2 );
1985 
1986  for ( unsigned int n = 0; n < numel; ++n ) {
1987  delete ptr[n];
1988  }
1989 }
1990 
1991 string measurePreemptiveSpeedupTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
1992 {
1993  stringstream ss;
1994  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines lt 1 lc 1 title \"adhereTo<>\", \\" << endl;
1995  ss << "'" << file << "' using " << paramColumn << ":4 with lines lt 1 lc 2 title \"type *ptr = glue\", \\" << endl;
1996  ss << "'" << file << "' using " << paramColumn << ":5 with lines lt 1 lc 3 title \"Calculation\", \\" << endl;
1997  ss << "'" << file << "' using " << paramColumn << ":(100-($5*100/($3+$4+$5))) with lines lt 1 lc 4 title \"idle time in \%\", \\" << endl;
1998  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines lt 1 lc 5 title \"Total\", \\" << endl;
1999  ss << "'" << file << "' using " << paramColumn << ":6 with lines lt 2 lc 1 title \"adhereTo<> *\", \\" << endl;
2000  ss << "'" << file << "' using " << paramColumn << ":7 with lines lt 2 lc 2 title \"type *ptr = glue *\", \\" << endl;
2001  ss << "'" << file << "' using " << paramColumn << ":8 with lines lt 2 lc 3 title \"Calculation *\", \\" << endl;
2002  ss << "'" << file << "' using " << paramColumn << ":(100-($8*100/($6+$7+$8))) with lines lt 2 lc 4 title \"idle time * in \%\", \\" << endl;
2003  ss << "'" << file << "' using " << paramColumn << ":($6+$7+$8) with lines lt 2 lc 5 title \"Total *\"";
2004  return ss.str();
2005 }
2006 
2007 
2008 TESTSTATICS ( measureExplicitAsyncSpeedupTest, "Measures runtime of preemptive versus non preemptive with explicite asynchronous preparation" );
2009 
2010 measureExplicitAsyncSpeedupTest::measureExplicitAsyncSpeedupTest() : performanceTest<int, int> ( "MeasureExplicitAsyncSpeedup" )
2011 {
2012  TESTPARAM ( 1, 1024, 1024000, 20, true, 10240, "Byte size per used chunk" );
2013  TESTPARAM ( 2, 1, 200, 20, true, 100, "percentage of array that will be written to" );
2014  plotParts = vector<string> ( {"Set Use", "Prepare", "Calculation", "Deletion", \
2015  "Set Use *", "Prepare *", "Calculation *", "Deletion *"
2016  } );
2017  plotTimingStats = false;
2018 }
2019 
2020 void measureExplicitAsyncSpeedupTest::actualTestMethod ( tester &test, int bytesize , int load )
2021 {
2022  unsigned int numel = 1024;
2023  rambrainglobals::config.resizeMemory ( numel / 2 * bytesize );
2024  rambrainglobals::config.resizeSwap ( numel * bytesize );
2025 
2026  managedPtr<char> *ptr[numel];
2027  for ( unsigned int n = 0; n < numel; ++n ) {
2028  ptr[n] = new managedPtr<char> ( bytesize );
2029  }
2030 
2031  float rewritetimes = load < 0 ? 1 : ( float ) load / 100.;
2032  int iterations = 10230;
2033 
2034 #ifdef PTEST_CHECKS
2035  double rewritetimesmin = rewritetimes;
2036  unsigned int iter[numel];
2037 #endif
2038 
2039  std::chrono::duration<double> allSetuse ( 0 );
2040  std::chrono::duration<double> allPrepare ( 0 );
2041  std::chrono::duration<double> allCalc ( 0 );
2042  using namespace std::chrono;
2043  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( true );
2044  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveUnloading ( true );
2045  for ( int i = 0; i < iterations; ++i ) {
2046  unsigned int use = ( i % numel );
2047 
2048 
2049  //Important: First say what you will use, then say, what you will use next!
2050  high_resolution_clock::time_point t0 = high_resolution_clock::now();
2051  adhereTo<char> glue ( *ptr[use] );
2052  high_resolution_clock::time_point t1 = high_resolution_clock::now();
2053  char *loc = glue; //Actually use the stuff.
2054  high_resolution_clock::time_point t2 = high_resolution_clock::now();
2055 
2056  for ( int r = 0; r < rewritetimes * bytesize; r++ ) {
2057  loc[r % bytesize] = r * i;
2058  }
2059  high_resolution_clock::time_point t3 = high_resolution_clock::now();
2060 
2061  std::chrono::duration<double> setuse = duration_cast<duration<double>> ( t1 - t0 );;
2062  std::chrono::duration<double> preparet = duration_cast<duration<double>> ( t2 - t1 );
2063  std::chrono::duration<double> calc = duration_cast<duration<double>> ( t3 - t2 );
2064  if ( load < 0 ) {
2065  rewritetimes *= ( preparet + setuse ) > calc ? 1.01 : .99;
2066  }
2067 
2068  allSetuse += setuse;
2069  allPrepare += preparet;
2070  allCalc += calc;
2071  }
2072 
2073  test.addExternalTime ( allSetuse );
2074  test.addExternalTime ( allPrepare );
2075  test.addExternalTime ( allCalc );
2076 
2077  std::chrono::duration<double> allSetuse2 ( 0 );
2078  std::chrono::duration<double> allPrepare2 ( 0 );
2079  std::chrono::duration<double> allCalc2 ( 0 );
2080  std::chrono::duration<double> allDel ( 0 );
2081 
2082  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( false );
2083  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveUnloading ( false );
2084  adhereTo <char> *adharr[numel];
2085  adharr[0] = new adhereTo<char> ( *ptr[0] );
2086 
2087  for ( int i = 0; i < iterations; ++i ) {
2088  unsigned int use = ( i % numel );
2089  unsigned int prepare = ( ( i + 1 ) % numel );
2090 
2091 
2092  //Important: First say what you will use, then say, what you will use next!
2093  high_resolution_clock::time_point t0 = high_resolution_clock::now();
2094  if ( i != iterations - 1 ) {
2095  adharr[prepare] = new adhereTo<char> ( *ptr[prepare] );
2096  }
2097  high_resolution_clock::time_point t1 = high_resolution_clock::now();
2098  char *loc = *adharr[use]; //Actually use the stuff.
2099  high_resolution_clock::time_point t2 = high_resolution_clock::now();
2100 
2101  for ( int r = 0; r < rewritetimes * bytesize; r++ ) {
2102  loc[r % bytesize] = r * i;
2103  }
2104  high_resolution_clock::time_point t3 = high_resolution_clock::now();
2105  delete adharr[use];
2106  high_resolution_clock::time_point t4 = high_resolution_clock::now();
2107  std::chrono::duration<double> setuse = duration_cast<duration<double>> ( t1 - t0 );;
2108  std::chrono::duration<double> preparet = duration_cast<duration<double>> ( t2 - t1 );
2109  std::chrono::duration<double> calc = duration_cast<duration<double>> ( t3 - t2 );
2110  std::chrono::duration<double> del = duration_cast<duration<double>> ( t4 - t3 );
2111  if ( load < 0 ) {
2112  rewritetimes *= ( preparet + setuse ) > calc ? 1.01 : .99;
2113  }
2114 #ifdef PTEST_CHECKS
2115  rewritetimesmin = rewritetimes < rewritetimesmin ? rewritetimes : rewritetimesmin;
2116  iter[use] = i;
2117 #endif
2118  allSetuse2 += setuse;
2119  allPrepare2 += preparet;
2120  allCalc2 += calc;
2121  allDel += del;
2122  }
2123 
2124 #ifdef PTEST_CHECKS
2125  for ( unsigned int x = 0; x < numel; ++x ) {
2126  adhereTo<char> glue ( *ptr[x] );
2127  char *loc = glue;
2128  for ( int r = 0; r < rewritetimesmin * bytesize; r++ )
2129  if ( loc[r % bytesize] != ( char ) ( r * iter[x] ) ) {
2130  errmsgf ( "Failed check! %d %d %d", x, iter[x], loc[r % bytesize] );
2131  }
2132  }
2133 #endif
2134  test.addExternalTime ( allSetuse2 );
2135  test.addExternalTime ( allPrepare2 );
2136  test.addExternalTime ( allCalc2 );
2137  test.addExternalTime ( allDel );
2138 
2139  for ( unsigned int n = 0; n < numel; ++n ) {
2140  delete ptr[n];
2141  }
2142 
2143 }
2144 
2145 string measureExplicitAsyncSpeedupTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
2146 {
2147  stringstream ss;
2148  ss << "plot '" << file << "' using " << paramColumn << ":3 with lines lt 1 lc 1 title \"adhereTo<>\", \\" << endl;
2149  ss << "'" << file << "' using " << paramColumn << ":4 with lines lt 1 lc 2 title \"type *ptr = glue\", \\" << endl;
2150  ss << "'" << file << "' using " << paramColumn << ":5 with lines lt 1 lc 3 title \"Calculation\", \\" << endl;
2151  ss << "'" << file << "' using " << paramColumn << ":(100-($5*100/($3+$4+$5))) with lines lt 1 lc 5 title \"idle time in \%\", \\" << endl;
2152  ss << "'" << file << "' using " << paramColumn << ":($3+$4+$5) with lines lt 1 lc 6 title \"Total\", \\" << endl;
2153  ss << "'" << file << "' using " << paramColumn << ":6 with lines lt 2 lc 1 title \"adhereTo<> *\", \\" << endl;
2154  ss << "'" << file << "' using " << paramColumn << ":7 with lines lt 2 lc 2 title \"type *ptr = glue *\", \\" << endl;
2155  ss << "'" << file << "' using " << paramColumn << ":8 with lines lt 2 lc 3 title \"Calculation *\", \\" << endl;
2156  ss << "'" << file << "' using " << paramColumn << ":9 with lines lt 2 lc 4 title \"adhereTo<> deletion *\", \\" << endl;
2157  ss << "'" << file << "' using " << paramColumn << ":(100-($8*100/($6+$7+$8+$9))) with lines lt 2 lc 5 title \"idle time * in \%\", \\" << endl;
2158  ss << "'" << file << "' using " << paramColumn << ":($6+$7+$8+$9) with lines lt 2 lc 6 title \"Total *\"";
2159  return ss.str();
2160 }
2161 
2162 
2163 TESTSTATICS ( measureConstSpeedupTest, "Measures runtime of const versus non-const usage of swapped data without preemptive" );
2164 
2165 measureConstSpeedupTest::measureConstSpeedupTest() : performanceTest<int> ( "MeasureConstSpeedup" )
2166 {
2167  TESTPARAM ( 1, 1024, 10240, 20, true, 324, "Byte size of data block" );
2168  plotParts = vector<string> ( {"Non-Const Swap In", "Non-Const Swap out", "Const Swap In", "Const Swap out"} );
2169  plotTimingStats = true;
2170 }
2171 
2172 void measureConstSpeedupTest::actualTestMethod ( tester &test, int kbytesize )
2173 {
2174  const unsigned int nBlocks = 1024;
2175  const unsigned int nDummyBlocks = nBlocks;
2176 
2177  global_bytesize goodBlockSize = nBlocks * kbytesize;
2178 
2179  rambrainglobals::config.resizeMemory ( goodBlockSize );
2180  rambrainglobals::config.resizeSwap ( 2.0 * goodBlockSize );
2181 
2182  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveLoading ( false );
2183  ( ( cyclicManagedMemory * ) managedMemory::defaultManager )->setPreemptiveUnloading ( false );
2184 
2185  // Real data to be fetched later
2186  managedPtr<char> *realData[nBlocks];
2187  for ( unsigned int n = 0; n < nBlocks; ++n ) {
2188  realData[n] = new managedPtr<char> ( kbytesize );
2189  managedPtr<char> *data ( realData[n] );
2190  ADHERETOLOC ( char, data, loc );
2191  for ( int i = 0; i < kbytesize; ++i ) {
2192  loc[i] = i + n * kbytesize;
2193  }
2194  }
2195 
2196  // Dummy data in order to swap out real data
2197  managedPtr<char> *dummyData[nDummyBlocks];
2198  for ( unsigned int n = 0; n < nDummyBlocks; ++n ) {
2199  dummyData[n] = new managedPtr<char> ( kbytesize );
2200  managedPtr<char> *data ( dummyData[n] );
2201  ADHERETOLOC ( char, data, loc );
2202  for ( int i = 0; i < kbytesize; ++i ) {
2203  loc[i] = n * nDummyBlocks + i;
2204  }
2205  }
2206 
2207  // Swap in data non-const
2208  test.addTimeMeasurement();
2209  long long int sum = 0ull;
2210  for ( unsigned int n = 0; n < nBlocks; ++n ) {
2211  managedPtr<char> *data ( realData[n] );
2212  ADHERETOLOC ( char, data, loc );
2213  for ( int i = 0; i < kbytesize; ++i ) {
2214  sum += loc[i];
2215  }
2216  }
2217 
2218  // Swap out data non-const
2219  test.addTimeMeasurement();
2220  sum = 0ull;
2221  for ( unsigned int n = 0; n < nDummyBlocks; ++n ) {
2222  managedPtr<char> *data ( dummyData[n] );
2223  ADHERETOLOC ( char, data, loc );
2224  for ( int i = 0; i < kbytesize; ++i ) {
2225  sum += loc[i];
2226  }
2227  }
2228 
2229  // Swap in data const
2230  test.addTimeMeasurement();
2231  sum = 0ull;
2232  for ( unsigned int n = 0; n < nBlocks; ++n ) {
2233  managedPtr<char> *data ( realData[n] );
2234  ADHERETOLOCCONST ( char, data, loc );
2235  for ( int i = 0; i < kbytesize; ++i ) {
2236  sum += loc[i];
2237  }
2238  }
2239 
2240  // Swap out data const
2241  test.addTimeMeasurement();
2242  sum = 0ull;
2243  for ( unsigned int n = 0; n < nDummyBlocks; ++n ) {
2244  managedPtr<char> *data ( dummyData[n] );
2245  ADHERETOLOC ( char, data, loc );
2246  for ( int i = 0; i < kbytesize; ++i ) {
2247  sum += loc[i];
2248  }
2249  }
2250  test.addTimeMeasurement();
2251 
2252 #ifdef PTEST_CHECKS
2253  for ( unsigned int n = 0; n < nBlocks; ++n ) {
2254  managedPtr<char> *data ( realData[n] );
2255  ADHERETOLOC ( char, data, loc );
2256  for ( int i = 0; i < kbytesize; ++i ) {
2257  if ( loc[i] != static_cast<char> ( i + n * kbytesize ) ) {
2258  errmsgf ( "Failed check! %d %d %d", n, i, loc[i] );
2259  }
2260  }
2261  }
2262 #endif
2263 
2264 
2265  for ( unsigned int n = 0; n < nBlocks; ++n ) {
2266  delete realData[n];
2267  }
2268  for ( unsigned int n = 0; n < nDummyBlocks; ++n ) {
2269  delete dummyData[n];
2270  }
2271 }
2272 
2273 string measureConstSpeedupTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
2274 {
2275  stringstream ss;
2276  ss << "plot '" << file << "' using " << paramColumn << ":2 with lines title \"Non-Const Swap In\", \\" << endl;
2277  ss << "'" << file << "' using " << paramColumn << ":3 with lines title \"Non-Const Swap Out\", \\" << endl;
2278  ss << "'" << file << "' using " << paramColumn << ":4 with lines title \"Const Swap In\", \\" << endl;
2279  ss << "'" << file << "' using " << paramColumn << ":5 with lines title \"Const Swap out\"";
2280  return ss.str();
2281 }
2282 
2283 
2284 TESTSTATICS ( demonstrateDecayTest, "Should plot regeneration of hits over misses ratio" );
2285 
2286 demonstrateDecayTest::demonstrateDecayTest() : performanceTest<int> ( "DemonstrateDecay" )
2287 {
2288  TESTPARAM ( 1, 1024, 1024, 1, true, 1024, "Dummy pararameter" );
2289  plotParts = vector<string> ( {"Element allocation", "Consecutive Access", "Random Access", "Regenerating Access"} );
2290  plotTimingStats = true;
2291 }
2292 
2293 void demonstrateDecayTest::actualTestMethod ( tester &test, int kbytesize )
2294 {
2295  const unsigned int n_el = 100240;
2296  const unsigned int efac = 10;
2297  const unsigned int memsize = n_el * sizeof ( char ) / 2;
2298  const unsigned int swapsize = n_el * sizeof ( char ) * 3;
2299 
2301  rambrainglobals::config.resizeSwap ( swapsize );
2302  test.addTimeMeasurement();
2303  managedPtr<char> randomAccess[n_el];
2304  managedPtr<char> consecutiveAccess[n_el];
2305 
2306  test.setSeed();
2307  test.addTimeMeasurement();
2308 
2309  //Set consecutive order and check whether scheduler works correctly
2310  for ( unsigned int n = 0; n < n_el * efac; ++n ) {
2311  adhereTo<char> glue ( consecutiveAccess[n % n_el] );
2312  char *loc = glue;
2313  *loc = n % 256;
2314  }
2315  test.addTimeMeasurement();
2316  //Fill up preemptives with random access to object group randomAccess;
2317  for ( unsigned int n = 0; n < n_el * efac; ++n ) {
2318  unsigned int idx = test.random ( ( int ) n_el - 1 );
2319  adhereTo<char> glue ( randomAccess[idx] );
2320  char *loc = glue;
2321  *loc = n % 256;
2322  }
2323  test.addTimeMeasurement();
2324 
2325  //Now, try to access consecutive object group again
2326  for ( unsigned int n = 0; n < n_el * efac; ++n ) {
2327  adhereTo<char> glue ( consecutiveAccess[n % n_el] );
2328  char *loc = glue;
2329  *loc = n % 256;
2330  }
2331  //And see if we recovered from preemptives:
2332  test.addTimeMeasurement();
2333 }
2334 
2335 string demonstrateDecayTest::generateMyGnuplotPlotPart ( const string &file , int paramColumn )
2336 {
2337  return "";
2338 }
vector< testParameterBase * > parameters
void resizeMemory(global_bytesize memory)
Simple setter.
void addTimeMeasurement()
Saves the current timestamp.
Definition: tester.cpp:36
void resizeSwap(global_bytesize memory)
Simple setter.
void addExternalTime(std::chrono::duration< double >)
Add a duration to the timing list.
Definition: tester.cpp:45
virtual void runTests(unsigned int repetitions, const string &path="./")
Run this performance test with all given parameter variations and handle data collection and plotting...
void setSeed(unsigned int seed=time(NULL))
Set a new seed for random number generation.
Definition: tester.cpp:64
int random(int max) const
Get a random number (integer)
Definition: tester.cpp:74
void startNewTimeCycle()
Starts a new cycle of time measurements.
Definition: tester.cpp:89
virtual void actualTestMethod(tester &test, char **arguments, int &offset, unsigned int argumentscount)=0
Contains the actual test code.
uint64_t global_bytesize
Definition: common.h:65
virtual string valueAsString()=0
Cast the encapsulated parameter to a string and return it's mean value.
Derived performance test classes which take parameter types as template arguments.
A basic class to be used by tests. Provides helper methods and functionality e.g. time measurements...
Definition: tester.h:32
rambrainConfig config
You will find the object in managedMemory.cpp as we have to define it in some 'used' file in the link...
TESTSTATICS(matrixTransposeTest,"Measurements of allocation and definition, transposition, deletion times")
Base class for all performance tests which itself does not contain any parameters.
A base class to encapsulate parameters for performance tests.
const global_bytesize mib
Definition: common.h:68