00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <winstd.H>
00027
00028 #include <BoxArray.H>
00029 #include <DistributionMapping.H>
00030 #include <ParallelDescriptor.H>
00031 #include <ParmParse.H>
00032
00033 #include <iostream>
00034
00035 #if defined(BL_OLD_STL)
00036 #include <stdlib.h>
00037 #else
00038 #include <cstdlib>
00039 #endif
00040
00041 #include <list>
00042 #include <vector>
00043 #include <queue>
00044 #include <algorithm>
00045 #include <numeric>
00046
00047
00048
00049 DistributionMapping::Strategy
00050 DistributionMapping::m_Strategy = DistributionMapping::KNAPSACK;
00051
00052 DistributionMapping::PVMF
00053 DistributionMapping::m_BuildMap = &DistributionMapping::KnapSackProcessorMap;
00054
00055 const Array<int>&
00056 DistributionMapping::ProcessorMap () const
00057 {
00058 return m_procmap;
00059 }
00060
00061 int
00062 DistributionMapping::operator[] (int index) const
00063 {
00064 return m_procmap[index];
00065 }
00066
00067 DistributionMapping::Strategy
00068 DistributionMapping::strategy ()
00069 {
00070 return DistributionMapping::m_Strategy;
00071 }
00072
00073 int
00074 DistributionMapping::CacheSize ()
00075 {
00076 return m_Cache.size();
00077 }
00078
00079 void
00080 DistributionMapping::strategy (DistributionMapping::Strategy how)
00081 {
00082 DistributionMapping::m_Strategy = how;
00083
00084 switch (how)
00085 {
00086 case ROUNDROBIN:
00087 m_BuildMap = &DistributionMapping::RoundRobinProcessorMap;
00088 break;
00089 case KNAPSACK:
00090 m_BuildMap = &DistributionMapping::KnapSackProcessorMap;
00091 break;
00092 default:
00093 BoxLib::Error("Bad DistributionMapping::Strategy");
00094 }
00095 }
00096
00097
00098
00099
00100 bool DistributionMapping::m_Initialized = false;
00101
00102 bool
00103 DistributionMapping::operator== (const DistributionMapping& rhs) const
00104 {
00105 return m_procmap == rhs.m_procmap;
00106 }
00107
00108 bool
00109 DistributionMapping::operator!= (const DistributionMapping& rhs) const
00110 {
00111 return !operator==(rhs);
00112 }
00113
00114 void
00115 DistributionMapping::Initialize ()
00116 {
00117 DistributionMapping::m_Initialized = true;
00118
00119 ParmParse pp("DistributionMapping");
00120
00121 std::string theStrategy;
00122
00123 if (pp.query("strategy", theStrategy))
00124 {
00125 if (theStrategy == "ROUNDROBIN")
00126 {
00127 strategy(ROUNDROBIN);
00128 }
00129 else if (theStrategy == "KNAPSACK")
00130 {
00131 strategy(KNAPSACK);
00132 }
00133 else
00134 {
00135 std::string msg("Unknown strategy: ");
00136 msg += theStrategy;
00137 BoxLib::Warning(msg.c_str());
00138 }
00139 }
00140 }
00141
00142 void
00143 DistributionMapping::Finalize ()
00144 {}
00145
00146
00147
00148
00149 std::vector< Array<int> > DistributionMapping::m_Cache;
00150
00151 bool
00152 DistributionMapping::GetMap (const BoxArray& boxes)
00153 {
00154 const int N = boxes.size();
00155
00156 BL_ASSERT(m_procmap.size() == N + 1);
00157
00158
00159
00160 for (int i = m_Cache.size() - 1; i >= 0; i--)
00161 {
00162 if (m_Cache[i].size() == N + 1)
00163 {
00164 const Array<int>& cached_procmap = m_Cache[i];
00165
00166 for (int i = 0; i <= N; i++)
00167 m_procmap[i] = cached_procmap[i];
00168
00169 BL_ASSERT(m_procmap[N] == ParallelDescriptor::MyProc());
00170
00171 return true;
00172 }
00173 }
00174
00175 return false;
00176 }
00177
00178 DistributionMapping::DistributionMapping ()
00179 {}
00180
00181 DistributionMapping::DistributionMapping (const BoxArray& boxes, int nprocs)
00182 :
00183 m_procmap(boxes.size()+1)
00184 {
00185 define(boxes,nprocs);
00186 }
00187
00188 DistributionMapping::DistributionMapping (const BoxArray& boxes, int nprocs,
00189 int* procmapping)
00190 :
00191 m_procmap(boxes.size()+1)
00192 {
00193 for (int i=0;i<boxes.size();i++)
00194 m_procmap[i]=procmapping[i];
00195 m_procmap[boxes.size()]=ParallelDescriptor::MyProc();
00196 }
00197
00198 DistributionMapping::DistributionMapping (const DistributionMapping& d1,
00199 const DistributionMapping& d2)
00200 {
00201 const Array<int>& pmap_1 = d1.ProcessorMap();
00202 const Array<int>& pmap_2 = d2.ProcessorMap();
00203
00204 const int L1 = pmap_1.size() - 1;
00205 const int L2 = pmap_2.size() - 1;
00206
00207 m_procmap.resize(L1+L2+1);
00208
00209 for (int i = 0; i < L1; i++)
00210 m_procmap[i] = pmap_1[i];
00211
00212 for (int i = L1, j = 0; j < L2; i++, j++)
00213 m_procmap[i] = pmap_2[j];
00214
00215
00216
00217 m_procmap[m_procmap.size()-1] = ParallelDescriptor::MyProc();
00218 }
00219
00220 void
00221 DistributionMapping::define (const BoxArray& boxes, int nprocs)
00222 {
00223 if (!(m_procmap.size() == boxes.size()+1))
00224 m_procmap.resize(boxes.size()+1);
00225
00226 if (DistributionMapping::m_Strategy == ROUNDROBIN)
00227 {
00228
00229
00230
00231 (this->*m_BuildMap)(boxes,nprocs);
00232 }
00233 else
00234 {
00235 if (!GetMap(boxes))
00236 {
00237 (this->*m_BuildMap)(boxes,nprocs);
00238
00239 #if defined(BL_USE_MPI) && !defined(BL_NO_PROCMAP_CACHE)
00240
00241
00242
00243 DistributionMapping::m_Cache.push_back(m_procmap);
00244 #endif
00245 }
00246 }
00247 }
00248
00249 DistributionMapping::~DistributionMapping () {}
00250
00251 void
00252 DistributionMapping::FlushCache ()
00253 {
00254 DistributionMapping::m_Cache.clear();
00255 }
00256
00257 void
00258 DistributionMapping::AddToCache (const DistributionMapping& dm)
00259 {
00260 bool doit = true;
00261 const Array<int>& pmap = dm.ProcessorMap();
00262
00263 if (pmap.size() > 0)
00264 {
00265 BL_ASSERT(pmap[pmap.size()-1] == ParallelDescriptor::MyProc());
00266
00267 for (unsigned int i = 0; i < m_Cache.size() && doit; i++)
00268 {
00269 if (pmap.size() == m_Cache[i].size())
00270 {
00271 BL_ASSERT(pmap == m_Cache[i]);
00272
00273 doit = false;
00274 }
00275 }
00276
00277 if (doit)
00278 m_Cache.push_back(pmap);
00279 }
00280 }
00281
00282 void
00283 DistributionMapping::RoundRobinProcessorMap (int nboxes, int nprocs)
00284 {
00285 BL_ASSERT(nboxes > 0);
00286
00287 m_procmap.resize(nboxes+1);
00288
00289 for (int i = 0; i < nboxes; i++)
00290 m_procmap[i] = i % nprocs;
00291
00292
00293
00294 m_procmap[nboxes] = ParallelDescriptor::MyProc();
00295 }
00296
00297 void
00298 DistributionMapping::RoundRobinProcessorMap (const BoxArray& boxes, int nprocs)
00299 {
00300 BL_ASSERT(boxes.size() > 0);
00301 BL_ASSERT(m_procmap.size() == boxes.size()+1);
00302
00303 for (int i = 0; i < boxes.size(); i++)
00304 m_procmap[i] = i % nprocs;
00305
00306
00307
00308 m_procmap[boxes.size()] = ParallelDescriptor::MyProc();
00309 }
00310
00311 class WeightedBox
00312 {
00313 int m_boxid;
00314 long m_weight;
00315 public:
00316 WeightedBox () {}
00317 WeightedBox (int b, int w) : m_boxid(b), m_weight(w) {}
00318 long weight () const { return m_weight; }
00319 int boxid () const { return m_boxid; }
00320
00321 bool operator< (const WeightedBox& rhs) const
00322 {
00323 return weight() > rhs.weight();
00324 }
00325 };
00326
00327 class WeightedBoxList
00328 {
00329 std::list<WeightedBox> m_lb;
00330 long m_weight;
00331 public:
00332 WeightedBoxList() : m_weight(0) {}
00333 long weight () const
00334 {
00335 return m_weight;
00336 }
00337 void erase (std::list<WeightedBox>::iterator& it)
00338 {
00339 m_weight -= (*it).weight();
00340 m_lb.erase(it);
00341 }
00342 void push_back (const WeightedBox& bx)
00343 {
00344 m_weight += bx.weight();
00345 m_lb.push_back(bx);
00346 }
00347 std::list<WeightedBox>::const_iterator begin () const { return m_lb.begin(); }
00348 std::list<WeightedBox>::iterator begin () { return m_lb.begin(); }
00349 std::list<WeightedBox>::const_iterator end () const { return m_lb.end(); }
00350 std::list<WeightedBox>::iterator end () { return m_lb.end(); }
00351
00352 bool operator< (const WeightedBoxList& rhs) const
00353 {
00354 return weight() > rhs.weight();
00355 }
00356 };
00357
00358 static
00359 std::vector< std::list<int> >
00360 knapsack (const std::vector<long>& pts, int nprocs)
00361 {
00362
00363
00364
00365 static std::list<int> empty_list;
00366
00367 std::vector< std::list<int> > result(nprocs, empty_list);
00368
00369 std::vector<WeightedBox> lb;
00370 lb.reserve(pts.size());
00371 for (unsigned int i = 0; i < pts.size(); ++i)
00372 {
00373 lb.push_back(WeightedBox(i, pts[i]));
00374 }
00375 BL_ASSERT(lb.size() == pts.size());
00376 std::sort(lb.begin(), lb.end());
00377 BL_ASSERT(lb.size() == pts.size());
00378
00379
00380
00381 std::priority_queue<WeightedBoxList> wblq;
00382 for (int i = 0; i < nprocs; ++i)
00383 {
00384 wblq.push(WeightedBoxList());
00385 }
00386 BL_ASSERT(int(wblq.size()) == nprocs);
00387 for (unsigned int i = 0; i < pts.size(); ++i)
00388 {
00389 WeightedBoxList wbl = wblq.top();
00390 wblq.pop();
00391 wbl.push_back(lb[i]);
00392 wblq.push(wbl);
00393 }
00394 BL_ASSERT(int(wblq.size()) == nprocs);
00395 std::list<WeightedBoxList> wblqg;
00396 while (!wblq.empty())
00397 {
00398 wblqg.push_back(wblq.top());
00399 wblq.pop();
00400 }
00401 BL_ASSERT(int(wblqg.size()) == nprocs);
00402 wblqg.sort();
00403
00404
00405
00406 double max_weight = 0;
00407 double sum_weight = 0;
00408 std::list<WeightedBoxList>::iterator it = wblqg.begin();
00409 for ( ; it != wblqg.end(); ++it)
00410 {
00411 long wgt = (*it).weight();
00412 sum_weight += wgt;
00413 max_weight = (wgt > max_weight) ? wgt : max_weight;
00414 }
00415 top:
00416 std::list<WeightedBoxList>::iterator it_top = wblqg.begin();
00417 std::list<WeightedBoxList>::iterator it_chk = it_top;
00418 it_chk++;
00419 WeightedBoxList wbl_top = *it_top;
00420
00421
00422
00423 std::list<WeightedBox>::iterator it_wb = wbl_top.begin();
00424 for ( ; it_wb != wbl_top.end(); ++it_wb )
00425 {
00426
00427
00428
00429 for ( ; it_chk != wblqg.end(); ++it_chk)
00430 {
00431 WeightedBoxList wbl_chk = *it_chk;
00432 std::list<WeightedBox>::iterator it_owb = wbl_chk.begin();
00433 for ( ; it_owb != wbl_chk.end(); ++it_owb)
00434 {
00435
00436
00437
00438
00439
00440
00441
00442 double w_tb = (*it_top).weight() + (*it_owb).weight() - (*it_wb).weight();
00443 double w_ob = (*it_chk).weight() + (*it_wb).weight() - (*it_owb).weight();
00444
00445
00446
00447
00448 if (w_tb < (*it_top).weight() && w_ob < (*it_top).weight())
00449 {
00450
00451
00452
00453 WeightedBox wb = *it_wb;
00454 WeightedBox owb = *it_owb;
00455 wblqg.erase(it_top);
00456 wblqg.erase(it_chk);
00457 wbl_top.erase(it_wb);
00458 wbl_chk.erase(it_owb);
00459 wbl_top.push_back(owb);
00460 wbl_chk.push_back(wb);
00461 std::list<WeightedBoxList> tmp;
00462 tmp.push_back(wbl_top);
00463 tmp.push_back(wbl_chk);
00464 tmp.sort();
00465 wblqg.merge(tmp);
00466 max_weight = (*wblqg.begin()).weight();
00467
00468
00469
00470
00471
00472 goto top;
00473 }
00474 }
00475 }
00476 }
00477
00478
00479
00480 std::list<WeightedBoxList>::const_iterator cit = wblqg.begin();
00481 for (int i = 0; i < nprocs; ++i)
00482 {
00483 const WeightedBoxList& wbl = *cit;
00484 std::list<WeightedBox>::const_iterator it1 = wbl.begin();
00485 for ( ; it1 != wbl.end(); ++it1)
00486 {
00487 result[i].push_back((*it1).boxid());
00488 }
00489 ++cit;
00490 }
00491
00492 return result;
00493 }
00494
00495 void
00496 DistributionMapping::KnapSackProcessorMap (const std::vector<long>& pts,
00497 int nprocs)
00498 {
00499 BL_ASSERT(pts.size() > 0);
00500 m_procmap.resize(pts.size()+1);
00501
00502 if (int(pts.size()) <= nprocs || nprocs < 2)
00503 {
00504 RoundRobinProcessorMap(pts.size(),nprocs);
00505 }
00506 else
00507 {
00508 std::vector< std::list<int> > vec = knapsack(pts,nprocs);
00509
00510 BL_ASSERT(int(vec.size()) == nprocs);
00511
00512 std::list<int>::iterator lit;
00513
00514 for (unsigned int i = 0; i < vec.size(); i++)
00515 {
00516 for (lit = vec[i].begin(); lit != vec[i].end(); ++lit)
00517 {
00518 m_procmap[*lit] = i;
00519 }
00520 }
00521
00522
00523
00524 m_procmap[pts.size()] = ParallelDescriptor::MyProc();
00525 }
00526 }
00527
00528 void
00529 DistributionMapping::KnapSackProcessorMap (const BoxArray& boxes, int nprocs)
00530 {
00531 BL_ASSERT(boxes.size() > 0);
00532 BL_ASSERT(m_procmap.size() == boxes.size()+1);
00533
00534 if (boxes.size() <= nprocs || nprocs < 2)
00535 {
00536 RoundRobinProcessorMap(boxes,nprocs);
00537 }
00538 else
00539 {
00540 std::vector<long> pts(boxes.size());
00541
00542 for (unsigned int i = 0; i < pts.size(); i++)
00543 pts[i] = boxes[i].numPts();
00544
00545 std::vector< std::list<int> > vec = knapsack(pts,nprocs);
00546
00547 BL_ASSERT(int(vec.size()) == nprocs);
00548
00549 std::list<int>::iterator lit;
00550
00551 for (unsigned int i = 0; i < vec.size(); i++)
00552 {
00553 for (lit = vec[i].begin(); lit != vec[i].end(); ++lit)
00554 {
00555 m_procmap[*lit] = i;
00556 }
00557 }
00558
00559
00560
00561 m_procmap[boxes.size()] = ParallelDescriptor::MyProc();
00562 }
00563 }
00564
00565 void
00566 DistributionMapping::CacheStats (std::ostream& os)
00567 {
00568 os << "The DistributionMapping cache contains "
00569 << DistributionMapping::m_Cache.size()
00570 << " Processor Map(s):\n";
00571
00572 if (!DistributionMapping::m_Cache.empty())
00573 {
00574 for (unsigned int i = 0; i < m_Cache.size(); i++)
00575 {
00576 os << "\tMap #"
00577 << i
00578 << " is of length "
00579 << m_Cache[i].size()
00580 << '\n';
00581 }
00582 os << '\n';
00583 }
00584 }
00585
00586 std::ostream&
00587 operator<< (std::ostream& os,
00588 const DistributionMapping& pmap)
00589 {
00590 os << "(DistributionMapping" << '\n';
00591
00592
00593
00594 for (int i = 0; i < pmap.ProcessorMap().size() - 1; i++)
00595 {
00596 os << "m_procmap[" << i << "] = " << pmap.ProcessorMap()[i] << '\n';
00597 }
00598
00599 os << ')' << '\n';
00600
00601 if (os.fail())
00602 BoxLib::Error("operator<<(ostream &, DistributionMapping &) failed");
00603
00604 return os;
00605 }