00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <winstd.H>
00027
00028 #if defined(BL_OLD_STL)
00029 #include <stdio.h>
00030 #include <ctype.h>
00031 #include <assert.h>
00032 #include <math.h>
00033 #else
00034 #include <cstdio>
00035 #include <cctype>
00036 #include <cassert>
00037 #include <cmath>
00038 #endif
00039
00040 #include <iostream>
00041 #include <fstream>
00042 #include <iomanip>
00043 #include <map>
00044 #include <algorithm>
00045 #include <vector>
00046 #include <stack>
00047 #include <limits>
00048
00049 #include <Profiler.H>
00050 #include <Thread.H>
00051 #include <ParallelDescriptor.H>
00052 #include <ParmParse.H>
00053
00054 bool timer_packet::increasing = true;
00055 timer_packet::sort_criterion timer_packet::sort_by = timer_packet::sort_self;
00056
00057 #ifdef BL_USE_MPI
00058 namespace ParallelDescriptor
00059 {
00060 template <> MPI_Datatype Mpi_typemap<timer_packet>::type()
00061 {
00062 static MPI_Datatype mine(MPI_DATATYPE_NULL);
00063 if ( mine == MPI_DATATYPE_NULL )
00064 {
00065 timer_packet tp[2];
00066 MPI_Datatype types[] = {
00067 MPI_LB,
00068 MPI_INT,
00069 MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE,
00070 MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE,
00071 MPI_UB};
00072 int blocklens[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1};
00073 MPI_Aint disp[9];
00074 int n = 0;
00075 BL_MPI_REQUIRE( MPI_Address(&tp[0], &disp[n++]) );
00076 BL_MPI_REQUIRE( MPI_Address(&tp[0].count, &disp[n++]) );
00077 BL_MPI_REQUIRE( MPI_Address(&tp[0].time, &disp[n++]) );
00078 BL_MPI_REQUIRE( MPI_Address(&tp[0].self, &disp[n++]) );
00079 BL_MPI_REQUIRE( MPI_Address(&tp[0].avg_time, &disp[n++]) );
00080 BL_MPI_REQUIRE( MPI_Address(&tp[0].var_time, &disp[n++]) );
00081 BL_MPI_REQUIRE( MPI_Address(&tp[0].max_time, &disp[n++]) );
00082 BL_MPI_REQUIRE( MPI_Address(&tp[0].min_time, &disp[n++]) );
00083 BL_MPI_REQUIRE( MPI_Address(&tp[1], &disp[n++]) );
00084 for ( int i = n-1; i >= 0; i-- )
00085 {
00086 disp[i] -= disp[0];
00087 }
00088 BL_ASSERT( n == sizeof(types)/sizeof(MPI_Datatype) );
00089 BL_ASSERT( n == sizeof(disp)/sizeof(MPI_Aint) );
00090 BL_ASSERT( n == sizeof(blocklens)/sizeof(int) );
00091 BL_MPI_REQUIRE( MPI_Type_struct(n, blocklens, disp, types, &mine) );
00092 BL_MPI_REQUIRE( MPI_Type_commit( &mine ) );
00093 }
00094 return mine;
00095 }
00096 }
00097 #endif
00098 namespace
00099 {
00100 std::map<std::string, int> tagr;
00101 std::map<int, std::string> inv_tagr;
00102 Mutex profiling_mutex;
00103 }
00104
00105 class ThreadTimerNode
00106 {
00107 friend std::ostream& operator<<(std::ostream& os, const ThreadTimerNode& tree);
00108 public:
00109 typedef std::map<std::string, ThreadTimerNode*>::iterator iterator;
00110 typedef std::map<std::string, ThreadTimerNode*>::const_iterator const_iterator;
00111 ThreadTimerNode()
00112 : wtimer(new BoxLib::WallTimer),
00113 avg_time(0), var_time(0), max_time(0), min_time(0)
00114 {}
00115 ThreadTimerNode* operator[](const std::string& str);
00116 ThreadTimerNode* find(const std::string& str);
00117 const_iterator begin() const
00118 {
00119 return cntr.begin();
00120 }
00121 const_iterator end() const
00122 {
00123 return cntr.end();
00124 }
00125 void stop();
00126 void start()
00127 {
00128 wtimer->start();
00129 }
00130 double accum_time() const;
00131 int accum_count() const;
00132 double self_time() const;
00133 double max_timer() const
00134 {
00135 return max_time;
00136 }
00137 double min_timer() const
00138 {
00139 return min_time;
00140 }
00141 double child_time() const;
00142 void print(std::ostream& os, const std::string& str, int level);
00143 void mma_print(std::ostream& os, const std::string& str, int level);
00144 const std::map<std::string, ThreadTimerNode*>& nodes() const;
00145 private:
00146 ThreadTimerNode(const ThreadTimerNode&);
00147 ThreadTimerNode& operator=(const ThreadTimerNode&);
00148 BoxLib::WallTimer* wtimer;
00149 double avg_time;
00150 double var_time;
00151 double max_time;
00152 double min_time;
00153 std::map<std::string, ThreadTimerNode*> cntr;
00154 };
00155
00156 inline
00157 void
00158 ThreadTimerNode::stop()
00159 {
00160 wtimer->stop();
00161 double X = wtimer->time();
00162 int N = wtimer->count();
00163 if ( N == 1 )
00164 {
00165 avg_time = X;
00166 max_time = X;
00167 min_time = X;
00168 var_time = 0.0;
00169 }
00170 else
00171 {
00172 max_time = std::max(X, max_time);
00173 min_time = std::min(X, min_time);
00174 double oavg = avg_time;
00175 avg_time = ( X + avg_time*(N-1))/N;
00176 var_time = ( var_time*(N-2) + X*X - N*avg_time*avg_time + (N-1)*oavg*oavg )/ (N - 1);
00177 if ( var_time < 0.0 ) var_time = 0.0;
00178 }
00179 }
00180
00181 const std::map<std::string, ThreadTimerNode*>&
00182 ThreadTimerNode::nodes() const
00183 {
00184 return cntr;
00185 }
00186
00187 double
00188 ThreadTimerNode::accum_time() const
00189 {
00190 return wtimer->accum_time();
00191 }
00192
00193 int
00194 ThreadTimerNode::accum_count() const
00195 {
00196 return wtimer->count();
00197 }
00198
00199 double
00200 ThreadTimerNode::self_time() const
00201 {
00202 return accum_time() - child_time();
00203 }
00204
00205 double
00206 ThreadTimerNode::child_time() const
00207 {
00208 double t = 0;
00209 for ( const_iterator it = begin(); it != end(); ++it )
00210 {
00211 t += it->second->wtimer->accum_time();
00212 }
00213 return t;
00214 }
00215
00216 inline
00217 ThreadTimerNode*
00218 ThreadTimerNode::operator[](const std::string& str)
00219 {
00220 iterator it = cntr.find(str);
00221 if ( it == cntr.end() )
00222 {
00223 return cntr[str] = new ThreadTimerNode;
00224 }
00225 else
00226 {
00227 return it->second;
00228 }
00229 }
00230
00231 inline
00232 ThreadTimerNode*
00233 ThreadTimerNode::find(const std::string& str)
00234 {
00235 iterator it = cntr.find(str);
00236 if ( it == cntr.end() )
00237 {
00238 return cntr[str] = new ThreadTimerNode;
00239 }
00240 else
00241 {
00242 return it->second;
00243 }
00244 }
00245
00246 class ThreadTimerTree
00247 {
00248 public:
00249 ThreadTimerTree()
00250 {
00251 current = new ThreadTimerNode;
00252 }
00253 ThreadTimerNode* push(const std::string& str)
00254 {
00255 ThreadTimerNode* deeper = current->find(str);
00256 ttd_stack.push(current);
00257 current = deeper;
00258 BL_ASSERT(current);
00259 return current;
00260 }
00261 ThreadTimerNode* pop()
00262 {
00263 ThreadTimerNode* t = current;
00264 BL_ASSERT(ttd_stack.size() > 0);
00265 current = ttd_stack.top();
00266 ttd_stack.pop();
00267 BL_ASSERT(t);
00268 return t;
00269 }
00270 ThreadTimerNode* head() const
00271 {
00272 return current;
00273 }
00274 void print(std::ostream& os) const;
00275 void mma_print(std::ostream& os) const;
00276 private:
00277 ThreadTimerTree(const ThreadTimerTree&);
00278 ThreadTimerTree& operator=(const ThreadTimerTree&);
00279 ThreadTimerNode* current;
00280 std::stack<ThreadTimerNode*> ttd_stack;
00281 };
00282
00283 namespace
00284 {
00285 const int spcmult = 2;
00286 const int name_field_width = 40;
00287 const int name_field_bumper = 2;
00288 const int name_field_ellipse = 3;
00289
00290 const int timer_field_width = 24;
00291 const int time_field_width = 14;
00292 const int count_field_width = 8;
00293
00294 void
00295 spacer(std::ostream& os, int n, char c = ' ')
00296 {
00297 for ( int i = 0; i < n; ++i )
00298 {
00299 os << c;
00300 }
00301 }
00302 void show_name_field(std::ostream&, int level, const std::string& str);
00303 void show_time_count(std::ostream&, int wd, double acc, int cnt);
00304 void show_time(std::ostream& os, double time, int scale = 1000);
00305 void show_count(std::ostream& os, int count);
00306 void aggregate_field_title(std::ostream& os);
00307
00308 }
00309
00310 std::ostream&
00311 operator<<(std::ostream& os, const timer_packet& tp)
00312 {
00313 show_count(os, tp.count); os << ' ';
00314 show_time(os, tp.time); os << ' ';
00315 show_time(os, tp.self); os << ' ';
00316 show_time(os, tp.max_time); os << ' ';
00317 show_time(os, tp.min_time); os << ' ';
00318 show_time(os, tp.avg_time); os << ' ';
00319 show_name_field(os, 0, tp.name);
00320 return os;
00321 }
00322
00323
00324 struct ttn_packet
00325 {
00326 std::string name;
00327 int count;
00328 double time;
00329 double self;
00330 double max_time;
00331 double min_time;
00332 double avg_time;
00333 double var_time;
00334 ThreadTimerNode* ttn;
00335 struct by_name
00336 {
00337 bool operator()(const ttn_packet& l, const ttn_packet& r)
00338 {
00339 return l.name < r.name;
00340 }
00341 };
00342 struct by_self
00343 {
00344 bool operator()(const ttn_packet& l, const ttn_packet& r)
00345 {
00346 return l.self > r.self;
00347 }
00348 };
00349 };
00350
00351 void
00352 ThreadTimerNode::print(std::ostream& os, const std::string& str, int level)
00353 {
00354 show_time_count(os, timer_field_width, wtimer->accum_time(), wtimer->count());
00355 show_time(os, self_time()); os << ' ';
00356 show_time(os, max_time); os << ' ';
00357 show_time(os, min_time); os << ' ';
00358 show_time(os, avg_time); os << ' ';
00359 show_time(os, std::sqrt(var_time), 1000000); os << " ";
00360 show_name_field(os, level, str);
00361 os << '\n';
00362 std::vector<ttn_packet> ttns;
00363 for ( const_iterator it = begin(); it != end(); ++it )
00364 {
00365 ttn_packet ttn;
00366 ttn.name = it->first;
00367 ttn.ttn = it->second;
00368 ttn.count = it->second->wtimer->count();
00369 ttn.time = it->second->wtimer->accum_time();
00370 ttn.self = it->second->self_time();
00371 ttn.max_time = it->second->max_time;
00372 ttn.min_time = it->second->min_time;
00373 ttn.avg_time = it->second->avg_time;
00374 ttn.var_time = it->second->var_time;
00375 ttns.push_back(ttn);
00376 }
00377 std::sort(ttns.begin(), ttns.end(), ttn_packet::by_self());
00378 for ( std::vector<ttn_packet>::const_iterator it = ttns.begin(); it != ttns.end(); ++it )
00379 {
00380 it->ttn->print(os, it->name, level+1);
00381 }
00382 }
00383
00384 void
00385 ThreadTimerTree::print(std::ostream& os) const
00386 {
00387 for ( std::map<std::string, ThreadTimerNode*>::const_iterator it = head()->begin(); it != head()->end(); ++it )
00388 {
00389 it->second->print(os, it->first, 0);
00390 }
00391 }
00392
00393 void
00394 ThreadTimerNode::mma_print(std::ostream& os, const std::string& str, int level)
00395 {
00396 os << "\n";
00397 spacer(os, 2*(1+level), ' ');
00398 os << "bl3TimerNode[" << "\"" << str << "\"" << ", "
00399 << wtimer->accum_time() << ", "
00400 << wtimer->count() << ", "
00401 << self_time() << ", "
00402 << max_time << ", "
00403 << min_time << ", "
00404 << avg_time << ", "
00405 << std::sqrt(var_time) << ", ";
00406 os << "{";
00407 for ( const_iterator it = begin(); it != end(); )
00408 {
00409 it->second->mma_print(os, it->first, level+1);
00410 if ( ++it != end() ) os << ", ";
00411 }
00412 os << "}";
00413 os << "]";
00414 }
00415
00416 void
00417 ThreadTimerTree::mma_print(std::ostream& os) const
00418 {
00419 os << "{";
00420 for ( std::map<std::string, ThreadTimerNode*>::const_iterator it = head()->begin(); it != head()->end(); )
00421 {
00422 it->second->mma_print(os, it->first, 0);
00423 if ( ++it != head()->end() ) os << ", ";
00424 }
00425 os << "}";
00426 }
00427
00428 void
00429 grovel(const ThreadTimerNode* nodes, const std::string& str, timer_packet& t)
00430 {
00431 for ( ThreadTimerNode::const_iterator it = nodes->begin(); it != nodes->end(); ++it )
00432 {
00433 if ( it->first == str )
00434 {
00435 t.time += it->second->accum_time();
00436 t.count += it->second->accum_count();
00437 t.self += it->second->self_time();
00438 t.max_time = std::max(it->second->max_timer(), t.max_time);
00439 t.min_time = std::min(it->second->min_timer(), t.min_time);
00440 t.avg_time = t.time/t.count;
00441 }
00442 grovel(it->second, str, t);
00443 }
00444 }
00445
00446 bool Profiler::profiling = true;
00447 int Profiler::Tag::next_itag = 0;
00448
00449 namespace
00450 {
00451 ThreadSpecificData<int> tt_i;
00452 Mutex tt_mutex;
00453 std::vector<ThreadTimerTree*> tt_data;
00454
00455 bool verbose = false;
00456 std::string filename("bl_prof");
00457 bool mma = false;
00458 }
00459
00460 Profiler::Tag::Tag(const std::string& tag_)
00461 : tag(tag_)
00462 {
00463 if ( is_profiling() )
00464 {
00465 Lock<Mutex> lock(profiling_mutex);
00466 if ( tagr.find(tag) != tagr.end() )
00467 {
00468 std::cerr << "name: " << tag_ << " already registered" << std::endl;
00469 BoxLib::Abort();
00470 }
00471 itag = next_itag++;
00472 tagr[tag] = itag;
00473 inv_tagr[itag] = tag;
00474 }
00475 }
00476
00477
00478 const std::string&
00479 Profiler::Tag::name() const
00480 {
00481 return tag;
00482 }
00483
00484 void
00485 Profiler::Initialize()
00486 {
00487 ParmParse pp("profiler");
00488 pp.query("filename", filename);
00489 pp.query("mma", mma);
00490 pp.query("profiling", profiling);
00491 pp.query("verbose", verbose);
00492 if ( verbose )
00493 {
00494 std::cout << "profiler.filename" << filename << std::endl;
00495 std::cout << "profiler.mma" << mma << std::endl;
00496 std::cout << "profiler.profiling" << profiling << std::endl;
00497 std::cout << "profiler.verbose" << verbose << std::endl;
00498 }
00499 }
00500
00501 std::string
00502 Profiler::clean_name(const std::string& str)
00503 {
00504 #ifdef __GNUC__
00505
00506
00507
00508 std::string result;
00509 unsigned int i = 0;
00510 unsigned cnt = 0;
00511 while ( i < str.length() )
00512 {
00513 if ( !isdigit(str[i]) ) break;
00514 cnt += (str[i] - '0') + cnt*10;
00515 i++;
00516 }
00517 for (; i < str.length(); ++i)
00518 {
00519 result += str[i];
00520 }
00521 return result;
00522 #else
00523 return str;
00524 #endif
00525 }
00526
00527 void
00528 Profiler::Finalize()
00529 {
00530 }
00531
00532 Profiler::Profiler(const Tag& tag_, bool hold)
00533 : tag(tag_), started(false)
00534 {
00535 int* a = tt_i.get();
00536 if ( a == 0 )
00537 {
00538 tt_i.set(a = new int);
00539 Lock<Mutex> lock(tt_mutex);
00540 *a = tt_data.size();
00541 tt_data.push_back(new ThreadTimerTree);
00542 }
00543 if ( !hold )
00544 {
00545 start();
00546 }
00547 }
00548
00549 Profiler::~Profiler()
00550 {
00551 if ( started ) stop();
00552 }
00553
00554 void
00555 Profiler::start()
00556 {
00557 BL_ASSERT( !started );
00558 if ( profiling ) tt_data[*tt_i.get()]->push(tag.name())->start();
00559 started = true;
00560 }
00561
00562 void
00563 Profiler::stop()
00564 {
00565 BL_ASSERT( started );
00566 if ( profiling ) tt_data[*tt_i.get()]->pop()->stop();
00567 started = false;
00568 }
00569
00570 namespace
00571 {
00572 void
00573 mma_dump()
00574 {
00575
00576 std::string mma_fname = filename + ".m";
00577 if ( ParallelDescriptor::IOProcessor() )
00578 {
00579 std::ofstream os(mma_fname.c_str());
00580 os << "{";
00581 }
00582 for ( int i = 0; i < ParallelDescriptor::NProcs(); ++i )
00583 {
00584 if ( i == ParallelDescriptor::MyProc() )
00585 {
00586 std::ofstream os(mma_fname.c_str(), std::ios::app);
00587 std::ios::fmtflags oldFlags = os.flags();
00588 os.setf(std::ios::fixed, std::ios::floatfield);
00589 int oprec = os.precision(8);
00590 os << "{";
00591 for ( std::vector<ThreadTimerTree*>::const_iterator it = tt_data.begin(); it != tt_data.end(); )
00592 {
00593 (*it)->mma_print(os);
00594 if ( ++it != tt_data.end() ) os << ", ";
00595 }
00596 os.precision(oprec);
00597 os.flags(oldFlags);
00598 os << "}\n";
00599 if ( i != ParallelDescriptor::NProcs()-1 ) os << ", ";
00600 }
00601 ParallelDescriptor::Barrier();
00602 }
00603 if ( ParallelDescriptor::IOProcessor() )
00604 {
00605 std::ofstream os(mma_fname.c_str(), std::ios::app);
00606 os << "}";
00607 }
00608 }
00609 }
00610
00611 void
00612 Profiler::glean()
00613 {
00614
00615 std::vector<timer_packet> tps;
00616 for ( std::map<std::string, int>::const_iterator jt = tagr.begin(); jt != tagr.end(); ++jt )
00617 {
00618 timer_packet t;
00619 t.name = jt->first;
00620 for ( std::vector<ThreadTimerTree*>::const_iterator it = tt_data.begin(); it != tt_data.end(); ++it)
00621 {
00622 grovel((*it)->head(), jt->first, t);
00623 }
00624 tps.push_back(t);
00625 }
00626
00627 std::vector< std::vector< timer_packet> > t_packets;
00628 if ( ParallelDescriptor::IOProcessor() )
00629 {
00630 t_packets.resize(ParallelDescriptor::NProcs());
00631 t_packets[0] = tps;
00632 }
00633
00634 std::vector<size_t> ntags
00635 = ParallelDescriptor::Gather(tagr.size(), ParallelDescriptor::IOProcessorNumber());
00636 for ( int i = 1; i < ParallelDescriptor::NProcs(); ++i )
00637 {
00638 if ( ParallelDescriptor::IOProcessor() )
00639 {
00640 std::vector<size_t> lngths(ntags[i]);
00641 ParallelDescriptor::Recv(lngths, i, 101);
00642 std::vector< std::string > strngs;
00643 for ( unsigned int j = 0; j < lngths.size(); ++j )
00644 {
00645 std::vector<char> a(lngths[j]);
00646 ParallelDescriptor::Recv(a, i, 102);
00647 strngs.push_back(std::string(a.begin(), a.end()));
00648 }
00649 t_packets[i].resize(ntags[i]);
00650 ParallelDescriptor::Recv(t_packets[i], i, 103);
00651 for ( unsigned int j = 0; j < ntags[i]; ++j )
00652 {
00653 t_packets[i][j].name = strngs[j];
00654 }
00655 }
00656 else if ( ParallelDescriptor::MyProc() == i )
00657 {
00658 std::vector<size_t> lngths;
00659 for ( std::vector<timer_packet>::const_iterator it = tps.begin(); it != tps.end(); ++it )
00660 {
00661 lngths.push_back(it->name.size());
00662 }
00663 ParallelDescriptor::Send(lngths, 0, 101);
00664 for ( std::vector<timer_packet >::const_iterator it = tps.begin(); it != tps.end(); ++it )
00665 {
00666 const char* name = it->name.c_str();
00667 ParallelDescriptor::Send(name, it->name.size(), 0, 102);
00668 }
00669 ParallelDescriptor::Send(tps, 0, 103);
00670 }
00671 }
00672
00673 if ( ParallelDescriptor::IOProcessor() )
00674 {
00675 std::vector< timer_packet > tp_total;
00676 for ( int i = 0; i < ParallelDescriptor::NProcs(); ++i )
00677 {
00678 std::copy(t_packets[i].begin(), t_packets[i].end(), std::back_inserter(tp_total));
00679 }
00680 std::sort(tp_total.begin(), tp_total.end(), timer_packet::by_name());
00681 std::vector<timer_packet> tp_summary;
00682 timer_packet t;
00683 for ( std::vector<timer_packet>::iterator it = tp_total.begin();
00684 it != tp_total.end(); ++it )
00685 {
00686 t += *it;
00687 std::vector<timer_packet>::const_iterator it1(it); ++it1;
00688 if ( it1 == tp_total.end() || it->name != it1->name )
00689 {
00690
00691 tp_summary.push_back(t);
00692 t = timer_packet();
00693 continue;
00694 }
00695 }
00696 std::sort(tp_summary.begin(), tp_summary.end());
00697
00698
00699 std::ofstream os(filename.c_str());
00700 if ( !os )
00701 {
00702 std::cerr << "filename = " << filename << std::endl;
00703 BoxLib::Error("failed to open prof file");
00704 }
00705
00706 os << "------------------------------------------------------------------------\n\n";
00707 os << "Profiling report\n\n";
00708 os << "------------------------------------------------------------------------\n\n";
00709 os << "Timer resolution is "; show_time(os, BoxLib::WallTimer::tick(), 1000000); os << " (us)\n";
00710 os << "Number of Processors: " << ParallelDescriptor::NProcs() << std::endl;
00711
00712 spacer(os, 2, '\n');
00713 spacer(os, 72, '-'); os << '\n';
00714 spacer(os, 72, '-'); os << '\n';
00715 os << "Aggregate report\n\n";
00716 spacer(os, 16, '-'); os << '\n';
00717 aggregate_field_title(os);
00718 for ( std::vector<timer_packet>::const_iterator it = tp_summary.begin();
00719 it != tp_summary.end();++it)
00720 os << *it << "\n";
00721
00722 spacer(os, 2, '\n');
00723 spacer(os, 72, '-'); os << '\n';
00724 spacer(os, 72, '-'); os << '\n';
00725 os << "Per-Processor Report" << '\n';
00726 spacer(os, 20, '-'); os << '\n';
00727 os << "Number of processes Reporting " << t_packets.size() << std::endl;
00728 for ( int i = 0; i < ParallelDescriptor::NProcs(); ++i )
00729 {
00730 std::vector< timer_packet > tps( t_packets[i] );
00731 std::sort(tps.begin(), tps.end());
00732 os << "\nProcessor :" << i << std::endl;
00733 spacer(os, 20, '-'); os << '\n';
00734 aggregate_field_title(os);
00735 for ( std::vector<timer_packet>::const_iterator it = tps.begin();
00736 it != tps.end();++it)
00737 os << *it << "\n";
00738 }
00739
00740 spacer(os, 2, '\n');
00741 spacer(os, 72, '-'); os << '\n';
00742 os << "Details Profiling report\n";
00743 spacer(os, 72, '-'); os << '\n';
00744 }
00745
00746 for ( int i = 0; i < ParallelDescriptor::NProcs(); ++i )
00747 {
00748 if ( i == ParallelDescriptor::MyProc() )
00749 {
00750 std::ofstream os(filename.c_str(), std::ios::app);
00751 os << "\nProcessor Number " << std::setw(4) << i << std::endl;
00752 os << "-----------------" "----\n";
00753 os << "\nNumber of threads = " << std::setw(4) << tt_data.size() << std::endl;
00754 os << "--------------------" "----\n";
00755 int cnt = 0;
00756 for ( std::vector<ThreadTimerTree*>::const_iterator it = tt_data.begin(); it != tt_data.end(); ++it)
00757 {
00758 os << "\n\n";
00759 os << "Thread " << std::setw(4) << cnt++ << std::endl;
00760 os << "-------" "----\n";
00761 std::ios::fmtflags fldadjust = os.setf(std::ios::right, std::ios::adjustfield);
00762 os << std::setw(timer_field_width) << "Time (ms)/ Count " << ' ';
00763 os << std::setw(time_field_width) << "Self (ms)" << ' ';
00764 os << std::setw(time_field_width) << "Max (ms)" << ' ';
00765 os << std::setw(time_field_width) << "Min (ms)" << ' ';
00766 os << std::setw(time_field_width) << "Avg (ms)" << ' ';
00767 os << std::setw(time_field_width) << "STD (us)" << ' ';
00768 os.setf(std::ios::left, std::ios::adjustfield);
00769 os << std::setw(name_field_width) << "Timer: ";
00770 os << std::endl << std::flush;
00771 os.setf(fldadjust, std::ios::adjustfield);
00772 spacer(os, name_field_width + timer_field_width + 5*(time_field_width+1), '-'); os << '\n';
00773 (*it)->print(os);
00774 os << '\n';
00775 aggregate_field_title(os);
00776 std::vector<timer_packet> tps;
00777 for ( std::map<std::string, int>::const_iterator jt = tagr.begin(); jt != tagr.end(); ++jt )
00778 {
00779 timer_packet t;
00780 t.name = jt->first;
00781 grovel((*it)->head(), jt->first, t);
00782 tps.push_back(t);
00783 }
00784 std::sort(tps.begin(), tps.end());
00785 for (std::vector<timer_packet>::const_iterator jt = tps.begin();
00786 jt != tps.end(); ++jt )
00787 {
00788 if ( jt->time != 0 )
00789 {
00790 os << *jt << std::endl;
00791 }
00792 }
00793 }
00794 os << std::endl;
00795 }
00796 ParallelDescriptor::Barrier();
00797 }
00798 if ( mma )
00799 {
00800 mma_dump();
00801 }
00802 }
00803
00804
00805 bool
00806 Profiler::is_profiling()
00807 {
00808 Lock<Mutex> lock(profiling_mutex);
00809 return profiling;
00810 }
00811
00812 void
00813 Profiler::on()
00814 {
00815 Lock<Mutex> lock(profiling_mutex);
00816 profiling = true;
00817 }
00818
00819 void
00820 Profiler::off()
00821 {
00822 Lock<Mutex> lock(profiling_mutex);
00823 profiling = false;
00824 }
00825
00826
00827 namespace
00828 {
00829
00830 void
00831 show_time_count(std::ostream& os, int wd, double time, int count)
00832 {
00833 std::ios::fmtflags oldFlags = os.flags();
00834 if ( wd >= 3 )
00835 {
00836 wd -= 3;
00837 }
00838 int fwd = 2*wd/3;
00839 int oprec = os.precision();
00840 int prec = fwd-1-5-3;
00841 if ( prec < 0 ) prec = oprec;
00842 #ifdef __GNUC__
00843 char buff[128];
00844 sprintf(buff, "[%*.*f/%*d]", 2*wd/3, prec, time*1000, 1*wd/3, count);
00845 os << buff;
00846 #else
00847 os << std::fixed;
00848 os << std::showpoint;
00849 os << std::setprecision(prec);
00850 os << std::setw(0)
00851 << "[" << std::setw(2*wd/3) << time*1000
00852 << "/" << std::setw(1*wd/3) << count << "]";
00853 os << std::setprecision(oprec);
00854 #endif
00855 os.flags(oldFlags);
00856 }
00857
00858 void
00859 show_time(std::ostream& os, double time, int scale)
00860 {
00861 int precred = 0;
00862 int tscale = scale;
00863 while ( tscale /= 10 )
00864 {
00865 precred++;
00866 }
00867 std::ios::fmtflags oldFlags = os.flags();
00868 int fwd = time_field_width;
00869 int oprec = os.precision();
00870 int prec = fwd-1-5-precred;
00871 if ( prec < 0 ) prec = oprec;
00872 #ifdef __GNUC__
00873 char buff[1024];
00874 sprintf(buff, "%*.*f", fwd, prec, time*scale);
00875 os << buff;
00876 #else
00877 os << std::fixed;
00878 os << std::showpoint;
00879 os << std::setprecision(prec) << std::setw(fwd) << time*scale;
00880 os << std::setprecision(oprec);
00881 #endif
00882 os.flags(oldFlags);
00883 }
00884
00885 void
00886 show_count(std::ostream& os, int count)
00887 {
00888 os << std::setw(count_field_width) << count;
00889 }
00890
00891 void
00892 show_name_field(std::ostream& os, int level, const std::string& str)
00893 {
00894 spacer(os, level*spcmult);
00895 os << str;
00896 if ( false )
00897 {
00898 int len = str.size();
00899 int wdth = name_field_width - level*spcmult - name_field_bumper;
00900 if ( len > wdth )
00901 {
00902 int plen = len - wdth + name_field_ellipse;
00903 spacer(os, name_field_ellipse, '.');
00904 os << std::setw(wdth-name_field_ellipse) << str.substr(plen);
00905 }
00906 else
00907 {
00908 os << std::setw(wdth) << str.substr(0, wdth);
00909 }
00910 spacer(os, name_field_bumper);
00911 }
00912 }
00913
00914 void
00915 aggregate_field_title(std::ostream& os)
00916 {
00917 std::ios::fmtflags fldadjust = os.setf(std::ios::right, std::ios::adjustfield);
00918 os << std::setw(count_field_width) << "Count" << ' ';
00919 os << std::setw(time_field_width) << "Total (ms)" << ' ';
00920 os << std::setw(time_field_width) << "Self (ms)" << ' ';
00921 os << std::setw(time_field_width) << "Max (ms)" << ' ';
00922 os << std::setw(time_field_width) << "Min (ms)" << ' ';
00923 os << std::setw(time_field_width) << "Avg (ms)" << ' ';
00924 os.setf(std::ios::left, std::ios::adjustfield);
00925 os << std::setw(name_field_width) << "Registered profilers:";
00926 os.setf(fldadjust, std::ios::adjustfield);
00927 os << '\n';
00928 spacer(os, name_field_width + count_field_width + 5*(time_field_width+1), '-'); os << '\n';
00929 }
00930
00931 }