Line data Source code
1 : !|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
2 :
3 : module ice_distribution
4 :
5 : ! This module provides data types and routines for distributing
6 : ! blocks across processors.
7 : !
8 : ! author: Phil Jones, LANL
9 : ! Oct. 2004: Adapted from POP by William H. Lipscomb, LANL
10 : ! Jan. 2008: Elizabeth Hunke updated to new POP infrastructure
11 :
12 : use ice_kinds_mod
13 : use ice_domain_size, only: max_blocks
14 : use ice_communicate, only: my_task, master_task, create_communicator
15 : use ice_blocks, only: nblocks_x, nblocks_y, nblocks_tot, debug_blocks
16 : use ice_exit, only: abort_ice
17 : use ice_fileunits, only: nu_diag
18 :
19 : implicit none
20 : private
21 : save
22 :
23 : type, public :: distrb ! distribution data type
24 : integer (int_kind) :: &
25 : nprocs ,&! number of processors in this dist ! LCOV_EXCL_LINE
26 : communicator ,&! communicator to use in this dist ! LCOV_EXCL_LINE
27 : numLocalBlocks ! number of blocks distributed to this
28 : ! local processor
29 :
30 : integer (int_kind), dimension(:), pointer :: &
31 : blockLocation ,&! processor location for all blocks ! LCOV_EXCL_LINE
32 : blockLocalID ,&! local block id for all blocks ! LCOV_EXCL_LINE
33 : blockGlobalID ! global block id for each local block
34 :
35 : integer (int_kind), dimension(:), pointer :: blockCnt
36 : integer (int_kind), dimension(:,:), pointer :: blockIndex
37 :
38 : end type
39 :
40 : public :: create_distribution, &
41 : ice_distributionGet, & ! LCOV_EXCL_LINE
42 : ice_distributionGetBlockLoc, & ! LCOV_EXCL_LINE
43 : ice_distributionGetBlockID, & ! LCOV_EXCL_LINE
44 : create_local_block_ids
45 :
46 : character (char_len), public :: &
47 : processor_shape ! 'square-pop' (approx) POP default config
48 : ! 'square-ice' like square-pop but better for ice
49 : ! 'slenderX1' (NPX x 1)
50 : ! 'slenderX2' (NPX x 2)
51 :
52 : !***********************************************************************
53 :
54 : contains
55 :
56 : !***********************************************************************
57 :
58 37 : function create_distribution(dist_type, nprocs, work_per_block)
59 :
60 : ! This routine determines the distribution of blocks across processors
61 : ! by call the appropriate subroutine based on distribution type
62 : ! requested. Currently three distributions are supported:
63 : ! 2-d Cartesian distribution (cartesian), a load-balanced
64 : ! distribution using a rake algorithm based on an input amount of work
65 : ! per block, and a space-filling-curve algorithm.
66 :
67 : character (*), intent(in) :: &
68 : dist_type ! method for distributing blocks
69 : ! either cartesian or rake
70 :
71 : integer (int_kind), intent(in) :: &
72 : nprocs ! number of processors in this distribution
73 :
74 : integer (int_kind), dimension(:), intent(in) :: &
75 : work_per_block ! amount of work per block
76 :
77 : type (distrb) :: &
78 : create_distribution ! resulting structure describing
79 : ! distribution of blocks
80 :
81 : character(len=*),parameter :: subname='(create_distribution)'
82 :
83 : !----------------------------------------------------------------------
84 : !
85 : ! select the appropriate distribution type
86 : !
87 : !----------------------------------------------------------------------
88 :
89 74 : select case (trim(dist_type))
90 :
91 : case('cartesian')
92 :
93 28 : create_distribution = create_distrb_cart(nprocs, work_per_block)
94 :
95 : case('rake')
96 :
97 0 : create_distribution = create_distrb_rake(nprocs, work_per_block)
98 :
99 : case('roundrobin')
100 :
101 9 : create_distribution = create_distrb_roundrobin(nprocs, work_per_block)
102 :
103 : case('spiralcenter')
104 :
105 0 : create_distribution = create_distrb_spiralcenter(nprocs, work_per_block)
106 :
107 : case('wghtfile')
108 :
109 0 : create_distribution = create_distrb_wghtfile(nprocs, work_per_block)
110 :
111 : case('sectrobin')
112 :
113 0 : create_distribution = create_distrb_sectrobin(nprocs, work_per_block)
114 :
115 : case('sectcart')
116 :
117 0 : create_distribution = create_distrb_sectcart(nprocs, work_per_block)
118 :
119 : case('spacecurve')
120 :
121 0 : create_distribution = create_distrb_spacecurve(nprocs, work_per_block)
122 :
123 : case default
124 :
125 74 : call abort_ice(subname//'ERROR: ice distribution: unknown distribution type')
126 :
127 : end select
128 :
129 : !-----------------------------------------------------------------------
130 :
131 74 : end function create_distribution
132 :
133 : !***********************************************************************
134 :
135 37 : subroutine create_local_block_ids(block_ids, distribution)
136 :
137 : ! This routine determines which blocks in an input distribution are
138 : ! located on the local processor and creates an array of block ids
139 : ! for all local blocks.
140 :
141 : type (distrb), intent(in) :: &
142 : distribution ! input distribution for which local
143 : ! blocks required
144 :
145 : integer (int_kind), dimension(:), pointer :: &
146 : block_ids ! array of block ids for every block
147 : ! that resides on the local processor
148 : !-----------------------------------------------------------------------
149 : !
150 : ! local variables
151 : !
152 : !-----------------------------------------------------------------------
153 :
154 : integer (int_kind) :: &
155 : n, bcount ! dummy counters
156 :
157 : character(len=*),parameter :: subname='(create_local_block_ids)'
158 :
159 : !-----------------------------------------------------------------------
160 : !
161 : ! first determine number of local blocks to allocate array
162 : !
163 : !-----------------------------------------------------------------------
164 :
165 37 : bcount = 0
166 1126 : do n=1,size(distribution%blockLocation)
167 1126 : if (distribution%blockLocation(n) == my_task+1) bcount = bcount + 1
168 : end do
169 :
170 :
171 37 : if (bcount > 0) allocate(block_ids(bcount))
172 :
173 : !-----------------------------------------------------------------------
174 : !
175 : ! now fill array with proper block ids
176 : !
177 : !-----------------------------------------------------------------------
178 :
179 37 : if (bcount > 0) then
180 1126 : do n=1,size(distribution%blockLocation)
181 1126 : if (distribution%blockLocation(n) == my_task+1) then
182 160 : block_ids(distribution%blockLocalID(n)) = n
183 : endif
184 : end do
185 : endif
186 :
187 37 : end subroutine create_local_block_ids
188 :
189 : !***********************************************************************
190 :
191 28 : subroutine proc_decomposition(nprocs, nprocs_x, nprocs_y)
192 :
193 : ! This subroutine attempts to find an optimal (nearly square)
194 : ! 2d processor decomposition for a given number of processors.
195 :
196 : integer (int_kind), intent(in) :: &
197 : nprocs ! total number or processors
198 :
199 : integer (int_kind), intent(out) :: &
200 : nprocs_x, nprocs_y ! number of procs in each dimension
201 :
202 : !----------------------------------------------------------------------
203 : !
204 : ! local variables
205 : !
206 : !----------------------------------------------------------------------
207 :
208 : integer (int_kind) :: &
209 : iguess, jguess ! guesses for nproc_x,y
210 :
211 : real (real_kind) :: &
212 8 : square ! square root of nprocs
213 :
214 : character(len=*),parameter :: subname='(proc_decomposition)'
215 :
216 : !----------------------------------------------------------------------
217 : !
218 : ! start with an initial guess
219 : !
220 : !----------------------------------------------------------------------
221 :
222 28 : square = sqrt(real(nprocs,kind=real_kind))
223 28 : nprocs_x = 0
224 28 : nprocs_y = 0
225 :
226 28 : if (processor_shape == 'square-pop') then ! make as square as possible
227 0 : iguess = nint(square)
228 0 : jguess = nprocs/iguess
229 28 : elseif (processor_shape == 'square-ice') then ! better for bipolar ice
230 0 : jguess = nint(square)
231 0 : iguess = nprocs/jguess
232 28 : elseif (processor_shape == 'slenderX1') then ! 1 proc in y direction
233 0 : jguess = 1
234 0 : iguess = nprocs/jguess
235 : else ! 2 processors in y direction
236 28 : jguess = min(2, nprocs)
237 28 : iguess = nprocs/jguess
238 : endif
239 :
240 : !----------------------------------------------------------------------
241 : !
242 : ! try various decompositions to find the best
243 : !
244 : !----------------------------------------------------------------------
245 :
246 0 : proc_loop: do
247 28 : if (processor_shape == 'square-pop') then
248 0 : jguess = nprocs/iguess
249 : else
250 28 : iguess = nprocs/jguess
251 : endif
252 :
253 28 : if (iguess*jguess == nprocs) then ! valid decomp
254 :
255 : !*** if the blocks can be evenly distributed, it is a
256 : !*** good decomposition
257 28 : if (mod(nblocks_x,iguess) == 0 .and. &
258 : mod(nblocks_y,jguess) == 0) then
259 28 : nprocs_x = iguess
260 28 : nprocs_y = jguess
261 28 : exit proc_loop
262 :
263 : !*** if the blocks can be evenly distributed in a
264 : !*** transposed direction, it is a good decomposition
265 0 : else if (mod(nblocks_x,jguess) == 0 .and. &
266 : mod(nblocks_y,iguess) == 0) then
267 0 : nprocs_x = jguess
268 0 : nprocs_y = iguess
269 0 : exit proc_loop
270 :
271 : !*** A valid decomposition, but keep searching for
272 : !*** a better one
273 : else
274 0 : if (nprocs_x == 0) then
275 0 : nprocs_x = iguess
276 0 : nprocs_y = jguess
277 : endif
278 0 : if (processor_shape == 'square-pop') then
279 0 : iguess = iguess - 1
280 0 : if (iguess == 0) then
281 0 : exit proc_loop
282 : else
283 0 : cycle proc_loop
284 : endif
285 : else
286 0 : jguess = jguess - 1
287 0 : if (jguess == 0) then
288 0 : exit proc_loop
289 : else
290 0 : cycle proc_loop
291 : endif
292 : endif
293 : endif
294 :
295 : else ! invalid decomp - keep trying
296 :
297 0 : if (processor_shape == 'square-pop') then
298 0 : iguess = iguess - 1
299 0 : if (iguess == 0) then
300 0 : exit proc_loop
301 : else
302 0 : cycle proc_loop
303 : endif
304 : else
305 0 : jguess = jguess - 1
306 0 : if (jguess == 0) then
307 0 : exit proc_loop
308 : else
309 0 : cycle proc_loop
310 : endif
311 : endif
312 : endif
313 :
314 : end do proc_loop
315 :
316 28 : if (nprocs_x == 0) then
317 0 : call abort_ice(subname//'ERROR: Unable to find 2d processor config')
318 : endif
319 :
320 28 : if (my_task == master_task) then
321 5 : write(nu_diag,'(a,a23,i4,a3,i4)') subname,' Processors (X x Y) = ', &
322 10 : nprocs_x,' x ',nprocs_y
323 : endif
324 :
325 : !----------------------------------------------------------------------
326 :
327 28 : end subroutine proc_decomposition
328 :
329 : !**********************************************************************
330 :
331 0 : subroutine ice_distributionDestroy(distribution)
332 :
333 : ! This routine destroys a defined distribution by deallocating
334 : ! all memory associated with the distribution.
335 :
336 : type (distrb), intent(inout) :: &
337 : distribution ! distribution to destroy
338 :
339 : !----------------------------------------------------------------------
340 : !
341 : ! local variables
342 : !
343 : !----------------------------------------------------------------------
344 :
345 : integer (int_kind) :: istat ! status flag for deallocate
346 :
347 : character(len=*),parameter :: subname='(ice_distributionDestroy)'
348 :
349 : !----------------------------------------------------------------------
350 : !
351 : ! reset scalars
352 : !
353 : !----------------------------------------------------------------------
354 :
355 0 : distribution%nprocs = 0
356 0 : distribution%communicator = 0
357 0 : distribution%numLocalBlocks = 0
358 :
359 : !----------------------------------------------------------------------
360 : !
361 : ! deallocate arrays
362 : !
363 : !----------------------------------------------------------------------
364 :
365 0 : deallocate(distribution%blockLocation, stat=istat)
366 0 : deallocate(distribution%blockLocalID , stat=istat)
367 0 : deallocate(distribution%blockGlobalID, stat=istat)
368 0 : deallocate(distribution%blockCnt , stat=istat)
369 0 : deallocate(distribution%blockindex , stat=istat)
370 :
371 :
372 : !-----------------------------------------------------------------------
373 :
374 0 : end subroutine ice_distributionDestroy
375 :
376 : !***********************************************************************
377 :
378 314665 : subroutine ice_distributionGet(distribution,&
379 : nprocs, communicator, numLocalBlocks, & ! LCOV_EXCL_LINE
380 314665 : blockLocation, blockLocalID, blockGlobalID)
381 :
382 : ! This routine extracts information from a distribution.
383 :
384 : type (distrb), intent(in) :: &
385 : distribution ! input distribution for which information
386 : ! is requested
387 :
388 : integer (int_kind), intent(out), optional :: &
389 : nprocs ,&! number of processors in this dist ! LCOV_EXCL_LINE
390 : communicator ,&! communicator to use in this dist ! LCOV_EXCL_LINE
391 : numLocalBlocks ! number of blocks distributed to this
392 : ! local processor
393 :
394 : integer (int_kind), dimension(:), optional :: &
395 : blockLocation ,&! processor location for all blocks ! LCOV_EXCL_LINE
396 : blockLocalID ,&! local block id for all blocks ! LCOV_EXCL_LINE
397 : blockGlobalID ! global block id for each local block
398 :
399 : character(len=*),parameter :: subname='(ice_distributionGet)'
400 :
401 : !-----------------------------------------------------------------------
402 : !
403 : ! depending on which optional arguments are present, extract the
404 : ! requested info
405 : !
406 : !-----------------------------------------------------------------------
407 :
408 314665 : if (present(nprocs)) nprocs = distribution%nprocs
409 314665 : if (present(communicator)) communicator = distribution%communicator
410 314665 : if (present(numLocalBlocks)) numLocalBlocks = distribution%numLocalBlocks
411 :
412 314665 : if (present(blockLocation)) then
413 0 : if (associated(distribution%blockLocation)) then
414 0 : blockLocation = distribution%blockLocation
415 : else
416 0 : call abort_ice(subname//'ERROR: blockLocation not allocated')
417 0 : return
418 : endif
419 : endif
420 :
421 314665 : if (present(blockLocalID)) then
422 0 : if (associated(distribution%blockLocalID)) then
423 0 : blockLocalID = distribution%blockLocalID
424 : else
425 0 : call abort_ice(subname//'ERROR: blockLocalID not allocated')
426 0 : return
427 : endif
428 : endif
429 :
430 314665 : if (present(blockGlobalID)) then
431 37 : if (associated(distribution%blockGlobalID)) then
432 197 : blockGlobalID = distribution%blockGlobalID
433 : else
434 0 : call abort_ice(subname//'ERROR: blockGlobalID not allocated')
435 0 : return
436 : endif
437 : endif
438 :
439 : !-----------------------------------------------------------------------
440 :
441 : end subroutine ice_distributionGet
442 :
443 : !***********************************************************************
444 :
445 16334 : subroutine ice_distributionGetBlockLoc(distribution, blockID, &
446 : processor, localID)
447 :
448 : ! Given a distribution of blocks and a global block ID, return
449 : ! the processor and local index for the block. A zero for both
450 : ! is returned in the case that the block has been eliminated from
451 : ! the distribution (i.e. has no active points).
452 :
453 : type (distrb), intent(in) :: &
454 : distribution ! input distribution for which information
455 : ! is requested
456 :
457 : integer (int_kind), intent(in) :: &
458 : blockID ! global block id for which location requested
459 :
460 : integer (int_kind), intent(out) :: &
461 : processor, &! processor on which block resides ! LCOV_EXCL_LINE
462 : localID ! local index for this block on this proc
463 :
464 : character(len=*),parameter :: subname='(ice_distributionGetBlockLoc)'
465 :
466 : !-----------------------------------------------------------------------
467 : !
468 : ! check for valid blockID
469 : !
470 : !-----------------------------------------------------------------------
471 :
472 16334 : if (blockID < 0 .or. blockID > nblocks_tot) then
473 0 : call abort_ice(subname//'ERROR: invalid block id')
474 0 : return
475 : endif
476 :
477 : !-----------------------------------------------------------------------
478 : !
479 : ! extract the location from the distribution data structure
480 : !
481 : !-----------------------------------------------------------------------
482 :
483 16334 : processor = distribution%blockLocation(blockID)
484 16334 : localID = distribution%blockLocalID (blockID)
485 :
486 : !-----------------------------------------------------------------------
487 :
488 : end subroutine ice_distributionGetBlockLoc
489 :
490 : !***********************************************************************
491 :
492 1242816 : subroutine ice_distributionGetBlockID(distribution, localID, &
493 : blockID)
494 :
495 : ! Given a distribution of blocks and a local block index, return
496 : ! the global block id for the block.
497 :
498 : type (distrb), intent(in) :: &
499 : distribution ! input distribution for which information
500 : ! is requested
501 :
502 : integer (int_kind), intent(in) :: &
503 : localID ! local index for this block on this proc
504 :
505 : integer (int_kind), intent(out) :: &
506 : blockID ! global block id for this local block
507 :
508 : character(len=*),parameter :: subname='(ice_distributionGetBlockID)'
509 :
510 : !-----------------------------------------------------------------------
511 : !
512 : ! check for valid localID
513 : !
514 : !-----------------------------------------------------------------------
515 :
516 1242816 : if (localID < 0 .or. localID > distribution%numLocalBlocks) then
517 0 : call abort_ice(subname//'ERROR: invalid local id')
518 0 : return
519 : endif
520 :
521 : !-----------------------------------------------------------------------
522 : !
523 : ! extract the global ID from the distribution data structure
524 : !
525 : !-----------------------------------------------------------------------
526 :
527 1242816 : blockID = distribution%blockGlobalID (localID)
528 :
529 : !-----------------------------------------------------------------------
530 :
531 : end subroutine ice_distributionGetBlockID
532 :
533 : !***********************************************************************
534 :
535 28 : function create_distrb_cart(nprocs, workPerBlock) result(newDistrb)
536 :
537 : ! This function creates a distribution of blocks across processors
538 : ! using a 2-d Cartesian distribution.
539 :
540 : integer (int_kind), intent(in) :: &
541 : nprocs ! number of processors in this distribution
542 :
543 : integer (int_kind), dimension(:), intent(in) :: &
544 : workPerBlock ! amount of work per block
545 :
546 : type (distrb) :: &
547 : newDistrb ! resulting structure describing Cartesian
548 : ! distribution of blocks
549 :
550 : !----------------------------------------------------------------------
551 : !
552 : ! local variables
553 : !
554 : !----------------------------------------------------------------------
555 :
556 : integer (int_kind) :: &
557 : i, j, &! dummy loop indices ! LCOV_EXCL_LINE
558 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
559 : iblock, jblock, &! ! LCOV_EXCL_LINE
560 : is, ie, js, je, &! start, end block indices for each proc ! LCOV_EXCL_LINE
561 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
562 : globalID, &! global block ID ! LCOV_EXCL_LINE
563 : localID, &! block location on this processor ! LCOV_EXCL_LINE
564 : nprocsX, &! num of procs in x for global domain ! LCOV_EXCL_LINE
565 : nprocsY, &! num of procs in y for global domain ! LCOV_EXCL_LINE
566 : numBlocksXPerProc, &! num of blocks per processor in x ! LCOV_EXCL_LINE
567 : numBlocksYPerProc, &! num of blocks per processor in y ! LCOV_EXCL_LINE
568 : numBlocksPerProc ! required number of blocks per processor
569 :
570 : character(len=char_len) :: &
571 : numBlocksPerProc_str ! required number of blocks per processor (as string)
572 :
573 : character(len=*),parameter :: subname='(create_distrb_cart)'
574 :
575 : !----------------------------------------------------------------------
576 : !
577 : ! create communicator for this distribution
578 : !
579 : !----------------------------------------------------------------------
580 :
581 28 : call create_communicator(newDistrb%communicator, nprocs)
582 :
583 : !----------------------------------------------------------------------
584 : !
585 : ! try to find best processor arrangement
586 : !
587 : !----------------------------------------------------------------------
588 :
589 28 : newDistrb%nprocs = nprocs
590 :
591 28 : call proc_decomposition(nprocs, nprocsX, nprocsY)
592 :
593 :
594 : !----------------------------------------------------------------------
595 : !
596 : ! allocate space for decomposition
597 : !
598 : !----------------------------------------------------------------------
599 :
600 : allocate (newDistrb%blockLocation(nblocks_tot), &
601 28 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
602 :
603 28 : if (istat > 0) then
604 : call abort_ice( &
605 0 : 'create_distrb_cart: error allocating blockLocation or blockLocalID')
606 0 : return
607 : endif
608 :
609 28 : allocate (newDistrb%blockCnt(nprocs))
610 204 : newDistrb%blockCnt(:) = 0
611 :
612 28 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
613 556 : newDistrb%blockIndex(:,:) = 0
614 :
615 : !----------------------------------------------------------------------
616 : !
617 : ! distribute blocks linearly across processors in each direction
618 : !
619 : !----------------------------------------------------------------------
620 :
621 28 : numBlocksXPerProc = (nblocks_x-1)/nprocsX + 1
622 28 : numBlocksYPerProc = (nblocks_y-1)/nprocsY + 1
623 :
624 : ! Check if max_blocks is too small
625 28 : numBlocksPerProc = numBlocksXPerProc * numBlocksYPerProc
626 28 : if (numBlocksPerProc > max_blocks) then
627 0 : write(numBlocksPerProc_str, '(i2)') numBlocksPerProc
628 0 : call abort_ice(subname//'ERROR: max_blocks too small (need at least '//trim(numBlocksPerProc_str)//')')
629 0 : return
630 : endif
631 :
632 84 : do j=1,nprocsY
633 260 : do i=1,nprocsX
634 176 : processor = (j-1)*nprocsX + i ! number the processors
635 : ! left to right, bot to top
636 :
637 176 : is = (i-1)*numBlocksXPerProc + 1 ! starting block in i
638 176 : ie = i *numBlocksXPerProc ! ending block in i
639 176 : if (ie > nblocks_x) ie = nblocks_x
640 176 : js = (j-1)*numBlocksYPerProc + 1 ! starting block in j
641 176 : je = j *numBlocksYPerProc ! ending block in j
642 176 : if (je > nblocks_y) je = nblocks_y
643 :
644 176 : localID = 0 ! initialize counter for local index
645 528 : do jblock = js,je
646 976 : do iblock = is,ie
647 448 : globalID = (jblock - 1)*nblocks_x + iblock
648 800 : if (workPerBlock(globalID) /= 0) then
649 448 : localID = localID + 1
650 448 : newDistrb%blockLocation(globalID) = processor
651 448 : newDistrb%blockLocalID (globalID) = localID
652 448 : newDistrb%blockCnt(processor) = newDistrb%blockCnt(processor) + 1
653 448 : newDistrb%blockIndex(processor,localID) = globalID
654 : else ! no work - eliminate block from distribution
655 0 : newDistrb%blockLocation(globalID) = 0
656 0 : newDistrb%blockLocalID (globalID) = 0
657 : endif
658 : end do
659 : end do
660 :
661 : ! if this is the local processor, set number of local blocks
662 232 : if (my_task == processor - 1) then
663 28 : newDistrb%numLocalBlocks = localID
664 : endif
665 :
666 : end do
667 : end do
668 :
669 : !----------------------------------------------------------------------
670 : !
671 : ! now store the local info
672 : !
673 : !----------------------------------------------------------------------
674 :
675 28 : if (newDistrb%numLocalBlocks > 0) then
676 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
677 28 : stat=istat)
678 28 : if (istat > 0) then
679 : call abort_ice( &
680 0 : 'create_distrb_cart: error allocating blockGlobalID')
681 0 : return
682 : endif
683 :
684 84 : do j=1,nprocsY
685 260 : do i=1,nprocsX
686 176 : processor = (j-1)*nprocsX + i
687 :
688 232 : if (processor == my_task + 1) then
689 28 : is = (i-1)*numBlocksXPerProc + 1 ! starting block in i
690 28 : ie = i *numBlocksXPerProc ! ending block in i
691 28 : if (ie > nblocks_x) ie = nblocks_x
692 28 : js = (j-1)*numBlocksYPerProc + 1 ! starting block in j
693 28 : je = j *numBlocksYPerProc ! ending block in j
694 28 : if (je > nblocks_y) je = nblocks_y
695 :
696 28 : localID = 0 ! initialize counter for local index
697 84 : do jblock = js,je
698 164 : do iblock = is,ie
699 80 : globalID = (jblock - 1)*nblocks_x + iblock
700 136 : if (workPerBlock(globalID) /= 0) then
701 80 : localID = localID + 1
702 80 : newDistrb%blockGlobalID (localID) = globalID
703 : endif
704 : end do
705 : end do
706 :
707 : endif
708 :
709 : end do
710 : end do
711 :
712 : else
713 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
714 0 : stat=istat)
715 0 : if (istat > 0) then
716 : call abort_ice( &
717 0 : 'create_distrb_cart: error allocating blockGlobalID')
718 0 : return
719 : endif
720 : endif
721 :
722 : !----------------------------------------------------------------------
723 :
724 56 : end function create_distrb_cart
725 :
726 : !**********************************************************************
727 :
728 0 : function create_distrb_rake(nprocs, workPerBlock) result(newDistrb)
729 :
730 : ! This function distributes blocks across processors in a
731 : ! load-balanced manner based on the amount of work per block.
732 : ! A rake algorithm is used in which the blocks are first distributed
733 : ! in a Cartesian distribution and then a rake is applied in each
734 : ! Cartesian direction.
735 :
736 : integer (int_kind), intent(in) :: &
737 : nprocs ! number of processors in this distribution
738 :
739 : integer (int_kind), dimension(:), intent(in) :: &
740 : workPerBlock ! amount of work per block
741 :
742 : type (distrb) :: &
743 : newDistrb ! resulting structure describing
744 : ! load-balanced distribution of blocks
745 :
746 : !----------------------------------------------------------------------
747 : !
748 : ! local variables
749 : !
750 : !----------------------------------------------------------------------
751 :
752 : integer (int_kind) :: &
753 : i,j,n ,&! dummy loop indices ! LCOV_EXCL_LINE
754 : pid ,&! dummy for processor id ! LCOV_EXCL_LINE
755 : istat ,&! status flag for allocates ! LCOV_EXCL_LINE
756 : localBlock ,&! local block position on processor ! LCOV_EXCL_LINE
757 : numOcnBlocks ,&! number of ocean blocks ! LCOV_EXCL_LINE
758 : maxWork ,&! max amount of work in any block ! LCOV_EXCL_LINE
759 : nprocsX ,&! num of procs in x for global domain ! LCOV_EXCL_LINE
760 : nprocsY ! num of procs in y for global domain
761 :
762 : integer (int_kind), dimension(:), allocatable :: &
763 : priority ,&! priority for moving blocks ! LCOV_EXCL_LINE
764 : workTmp ,&! work per row or column for rake algrthm ! LCOV_EXCL_LINE
765 0 : procTmp ! temp processor id for rake algrthm
766 :
767 : type (distrb) :: dist ! temp hold distribution
768 :
769 : character(len=*),parameter :: subname='(create_distrb_rake)'
770 :
771 : !----------------------------------------------------------------------
772 : !
773 : ! first set up as Cartesian distribution
774 : !
775 : !----------------------------------------------------------------------
776 :
777 0 : dist = create_distrb_cart(nprocs, workPerBlock)
778 :
779 : !----------------------------------------------------------------------
780 : !
781 : ! if the number of blocks is close to the number of processors,
782 : ! only do a 1-d rake on the entire distribution
783 : !
784 : !----------------------------------------------------------------------
785 :
786 0 : numOcnBlocks = count(workPerBlock /= 0)
787 0 : maxWork = maxval(workPerBlock)
788 :
789 0 : if (numOcnBlocks <= 2*nprocs) then
790 0 : if (my_task == master_task) &
791 0 : write(nu_diag,*) subname,' 1d rake on entire distribution'
792 :
793 0 : allocate(priority(nblocks_tot), stat=istat)
794 0 : if (istat > 0) then
795 : call abort_ice( &
796 0 : 'create_distrb_rake: error allocating priority')
797 0 : return
798 : endif
799 :
800 : !*** initialize priority array
801 :
802 0 : do j=1,nblocks_y
803 0 : do i=1,nblocks_x
804 0 : n=(j-1)*nblocks_x + i
805 0 : if (workPerBlock(n) > 0) then
806 0 : priority(n) = maxWork + n - workPerBlock(n)
807 : else
808 0 : priority(n) = 0
809 : endif
810 : end do
811 : end do
812 :
813 0 : allocate(workTmp(nprocs), procTmp(nprocs), stat=istat)
814 0 : if (istat > 0) then
815 : call abort_ice( &
816 0 : 'create_distrb_rake: error allocating procTmp')
817 0 : return
818 : endif
819 :
820 0 : workTmp(:) = 0
821 0 : do i=1,nprocs
822 0 : procTmp(i) = i
823 0 : do n=1,nblocks_tot
824 0 : if (dist%blockLocation(n) == i) then
825 0 : workTmp(i) = workTmp(i) + workPerBlock(n)
826 : endif
827 : end do
828 : end do
829 :
830 0 : call ice_distributionRake (workTmp, procTmp, workPerBlock, &
831 0 : priority, dist)
832 :
833 0 : deallocate(workTmp, procTmp, stat=istat)
834 0 : if (istat > 0) then
835 : call abort_ice( &
836 0 : 'create_distrb_rake: error deallocating procTmp')
837 0 : return
838 : endif
839 :
840 : !----------------------------------------------------------------------
841 : !
842 : ! otherwise re-distribute blocks using a rake in each direction
843 : !
844 : !----------------------------------------------------------------------
845 :
846 : else
847 0 : if (my_task == master_task) &
848 0 : write(nu_diag,*) subname,' rake in each direction'
849 :
850 0 : call proc_decomposition(dist%nprocs, nprocsX, nprocsY)
851 :
852 : !----------------------------------------------------------------------
853 : !
854 : ! load-balance using a rake algorithm in the x-direction first
855 : !
856 : !----------------------------------------------------------------------
857 :
858 0 : allocate(priority(nblocks_tot), stat=istat)
859 0 : if (istat > 0) then
860 : call abort_ice( &
861 0 : 'create_distrb_rake: error allocating priority')
862 0 : return
863 : endif
864 :
865 : !*** set highest priority such that eastern-most blocks
866 : !*** and blocks with the least amount of work are
867 : !*** moved first
868 :
869 0 : do j=1,nblocks_y
870 0 : do i=1,nblocks_x
871 0 : n=(j-1)*nblocks_x + i
872 0 : if (workPerBlock(n) > 0) then
873 0 : priority(n) = (maxWork + 1)*(nblocks_x + i) - &
874 0 : workPerBlock(n)
875 : else
876 0 : priority(n) = 0
877 : endif
878 : end do
879 : end do
880 :
881 0 : allocate(workTmp(nprocsX), procTmp(nprocsX), stat=istat)
882 0 : if (istat > 0) then
883 : call abort_ice( &
884 0 : 'create_distrb_rake: error allocating procTmp')
885 0 : return
886 : endif
887 :
888 0 : do j=1,nprocsY
889 :
890 0 : workTmp(:) = 0
891 0 : do i=1,nprocsX
892 0 : pid = (j-1)*nprocsX + i
893 0 : procTmp(i) = pid
894 0 : do n=1,nblocks_tot
895 0 : if (dist%blockLocation(n) == pid) then
896 0 : workTmp(i) = workTmp(i) + workPerBlock(n)
897 : endif
898 : end do
899 : end do
900 :
901 0 : call ice_distributionRake (workTmp, procTmp, workPerBlock, &
902 0 : priority, dist)
903 : end do
904 :
905 0 : deallocate(workTmp, procTmp, stat=istat)
906 0 : if (istat > 0) then
907 : call abort_ice( &
908 0 : 'create_distrb_rake: error deallocating procTmp')
909 0 : return
910 : endif
911 :
912 : !----------------------------------------------------------------------
913 : !
914 : ! use a rake algorithm in the y-direction now
915 : !
916 : !----------------------------------------------------------------------
917 :
918 : !*** set highest priority for northern-most blocks
919 :
920 0 : do j=1,nblocks_y
921 0 : do i=1,nblocks_x
922 0 : n=(j-1)*nblocks_x + i
923 0 : if (workPerBlock(n) > 0) then
924 0 : priority(n) = (maxWork + 1)*(nblocks_y + j) - &
925 0 : workPerBlock(n)
926 : else
927 0 : priority(n) = 0
928 : endif
929 : end do
930 : end do
931 :
932 0 : allocate(workTmp(nprocsY), procTmp(nprocsY), stat=istat)
933 0 : if (istat > 0) then
934 : call abort_ice( &
935 0 : 'create_distrb_rake: error allocating procTmp')
936 0 : return
937 : endif
938 :
939 0 : do i=1,nprocsX
940 :
941 0 : workTmp(:) = 0
942 0 : do j=1,nprocsY
943 0 : pid = (j-1)*nprocsX + i
944 0 : procTmp(j) = pid
945 0 : do n=1,nblocks_tot
946 0 : if (dist%blockLocation(n) == pid) then
947 0 : workTmp(j) = workTmp(j) + workPerBlock(n)
948 : endif
949 : end do
950 : end do
951 :
952 0 : call ice_distributionRake (workTmp, procTmp, workPerBlock, &
953 0 : priority, dist)
954 :
955 : end do
956 :
957 0 : deallocate(workTmp, procTmp, priority, stat=istat)
958 0 : if (istat > 0) then
959 : call abort_ice( &
960 0 : 'create_distrb_rake: error deallocating procTmp')
961 0 : return
962 : endif
963 :
964 : endif ! 1d or 2d rake
965 :
966 : !----------------------------------------------------------------------
967 : !
968 : ! create new distribution with info extracted from the temporary
969 : ! distribution
970 : !
971 : !----------------------------------------------------------------------
972 :
973 0 : newDistrb%nprocs = nprocs
974 0 : newDistrb%communicator = dist%communicator
975 :
976 : allocate(newDistrb%blockLocation(nblocks_tot), &
977 0 : newDistrb%blockLocalID(nblocks_tot), stat=istat)
978 0 : if (istat > 0) then
979 : call abort_ice( &
980 0 : 'create_distrb_rake: error allocating blockLocation or blockLocalID')
981 0 : return
982 : endif
983 :
984 0 : allocate (newDistrb%blockCnt(nprocs))
985 0 : newDistrb%blockCnt(:) = 0
986 :
987 0 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
988 0 : newDistrb%blockIndex(:,:) = 0
989 :
990 0 : allocate(procTmp(nprocs), stat=istat)
991 0 : if (istat > 0) then
992 : call abort_ice( &
993 0 : 'create_distrb_rake: error allocating procTmp')
994 0 : return
995 : endif
996 :
997 0 : procTmp = 0
998 0 : do n=1,nblocks_tot
999 0 : pid = dist%blockLocation(n) ! processor id
1000 0 : newDistrb%blockLocation(n) = pid
1001 :
1002 0 : if (pid > 0) then
1003 0 : procTmp(pid) = procTmp(pid) + 1
1004 0 : if (procTmp(pid) > max_blocks) then
1005 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1006 0 : return
1007 : endif
1008 0 : newDistrb%blockLocalID (n) = procTmp(pid)
1009 0 : newDistrb%blockIndex(pid,procTmp(pid)) = n
1010 : else
1011 0 : newDistrb%blockLocalID (n) = 0
1012 : endif
1013 : end do
1014 :
1015 0 : newDistrb%blockCnt(:) = procTmp(:)
1016 0 : newDistrb%numLocalBlocks = procTmp(my_task+1)
1017 :
1018 0 : if (minval(procTmp) < 1) then
1019 0 : call abort_ice(subname//'ERROR: processors left with no blocks')
1020 0 : return
1021 : endif
1022 :
1023 0 : deallocate(procTmp, stat=istat)
1024 :
1025 0 : if (istat > 0) then
1026 0 : call abort_ice(subname//'ERROR: allocating last procTmp')
1027 0 : return
1028 : endif
1029 :
1030 : allocate(newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
1031 0 : stat=istat)
1032 :
1033 0 : if (istat > 0) then
1034 0 : call abort_ice(subname//'ERROR: allocating blockGlobalID')
1035 0 : return
1036 : endif
1037 :
1038 0 : localBlock = 0
1039 0 : do n=1,nblocks_tot
1040 0 : if (newDistrb%blockLocation(n) == my_task+1) then
1041 0 : localBlock = localBlock + 1
1042 0 : newDistrb%blockGlobalID(localBlock) = n
1043 : endif
1044 : end do
1045 :
1046 : !----------------------------------------------------------------------
1047 :
1048 0 : call ice_distributionDestroy(dist)
1049 :
1050 : !----------------------------------------------------------------------
1051 :
1052 0 : end function create_distrb_rake
1053 :
1054 : !***********************************************************************
1055 :
1056 9 : function create_distrb_roundrobin(nprocs, workPerBlock) result(newDistrb)
1057 :
1058 : ! This function creates a distribution of blocks across processors
1059 : ! using a simple roundrobin algorithm. Mean for prescribed ice or
1060 : ! standalone CAM mode.
1061 :
1062 : integer (int_kind), intent(in) :: &
1063 : nprocs ! number of processors in this distribution
1064 :
1065 : integer (int_kind), dimension(:), intent(in) :: &
1066 : workPerBlock ! amount of work per block
1067 :
1068 : type (distrb) :: &
1069 : newDistrb ! resulting structure describing Cartesian
1070 : ! distribution of blocks
1071 :
1072 : !----------------------------------------------------------------------
1073 : !
1074 : ! local variables
1075 : !
1076 : !----------------------------------------------------------------------
1077 :
1078 : integer (int_kind) :: &
1079 : i, j, &! dummy loop indices ! LCOV_EXCL_LINE
1080 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
1081 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
1082 : globalID, &! global block ID ! LCOV_EXCL_LINE
1083 : localID ! block location on this processor
1084 :
1085 : integer (int_kind), dimension(:), allocatable :: &
1086 9 : proc_tmp ! temp processor id
1087 :
1088 : character(len=*),parameter :: subname='(create_distrb_roundrobin)'
1089 :
1090 : !----------------------------------------------------------------------
1091 : !
1092 : ! create communicator for this distribution
1093 : !
1094 : !----------------------------------------------------------------------
1095 :
1096 9 : call create_communicator(newDistrb%communicator, nprocs)
1097 :
1098 : !----------------------------------------------------------------------
1099 : !
1100 : ! try to find best processor arrangement
1101 : !
1102 : !----------------------------------------------------------------------
1103 :
1104 9 : newDistrb%nprocs = nprocs
1105 :
1106 : !----------------------------------------------------------------------
1107 : !
1108 : ! allocate space for decomposition
1109 : !
1110 : !----------------------------------------------------------------------
1111 :
1112 : allocate (newDistrb%blockLocation(nblocks_tot), &
1113 9 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
1114 9 : if (istat > 0) then
1115 : call abort_ice( &
1116 0 : 'create_distrb_roundrobin: error allocating blockLocation or blockLocalID')
1117 0 : return
1118 : endif
1119 :
1120 9 : allocate (newDistrb%blockCnt(nprocs))
1121 :
1122 : !----------------------------------------------------------------------
1123 : !
1124 : ! distribute blocks across processors, one block per proc until used
1125 : !
1126 : !----------------------------------------------------------------------
1127 :
1128 9 : allocate(proc_tmp(nprocs))
1129 9 : processor = 0
1130 9 : globalID = 0
1131 74 : proc_tmp = 0
1132 :
1133 9 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
1134 731 : newDistrb%blockIndex(:,:) = 0
1135 :
1136 42 : do j=1,nblocks_y
1137 683 : do i=1,nblocks_x
1138 :
1139 641 : globalID = globalID + 1
1140 :
1141 674 : if (workPerBlock(globalID) /= 0) then
1142 633 : processor = mod(processor,nprocs) + 1
1143 633 : proc_tmp(processor) = proc_tmp(processor) + 1
1144 633 : localID = proc_tmp(processor)
1145 633 : if (localID > max_blocks) then
1146 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1147 0 : return
1148 : endif
1149 633 : newDistrb%blockLocation(globalID) = processor
1150 633 : newDistrb%blockLocalID (globalID) = localID
1151 633 : newDistrb%blockIndex(processor,localID) = globalID
1152 : else ! no work - eliminate block from distribution
1153 8 : newDistrb%blockLocation(globalID) = 0
1154 8 : newDistrb%blockLocalID (globalID) = 0
1155 : endif
1156 :
1157 : end do
1158 : end do
1159 :
1160 9 : newDistrb%numLocalBlocks = proc_tmp(my_task+1)
1161 74 : newDistrb%blockCnt(:) = proc_tmp(:)
1162 9 : deallocate(proc_tmp)
1163 :
1164 : ! write(nu_diag,*) 'my_task,newDistrb%numLocalBlocks',&
1165 : ! my_task,newDistrb%numLocalBlocks
1166 :
1167 : !----------------------------------------------------------------------
1168 : !
1169 : ! now store the local info
1170 : !
1171 : !----------------------------------------------------------------------
1172 :
1173 9 : globalID = 0
1174 :
1175 9 : if (newDistrb%numLocalBlocks > 0) then
1176 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
1177 9 : stat=istat)
1178 9 : if (istat > 0) then
1179 : call abort_ice( &
1180 0 : 'create_distrb_roundrobin: error allocating numLocalBlocks')
1181 0 : return
1182 : endif
1183 :
1184 9 : processor = my_task + 1
1185 89 : do localID = 1,newDistrb%numLocalBlocks
1186 0 : newDistrb%blockGlobalID (localID) = newDistrb%blockIndex(processor,&
1187 89 : localID)
1188 : enddo
1189 : endif
1190 :
1191 : !----------------------------------------------------------------------
1192 :
1193 9 : end function create_distrb_roundrobin
1194 :
1195 : !***********************************************************************
1196 :
1197 0 : function create_distrb_spiralcenter(nprocs, workPerBlock) result(newDistrb)
1198 :
1199 : ! This function creates a distribution of blocks across processors
1200 : ! using a simple spiralcenter algorithm. Mean for prescribed ice or
1201 : ! standalone CAM mode.
1202 :
1203 : integer (int_kind), intent(in) :: &
1204 : nprocs ! number of processors in this distribution
1205 :
1206 : integer (int_kind), dimension(:), intent(in) :: &
1207 : workPerBlock ! amount of work per block
1208 :
1209 : type (distrb) :: &
1210 : newDistrb ! resulting structure describing Cartesian
1211 : ! distribution of blocks
1212 :
1213 : !----------------------------------------------------------------------
1214 : !
1215 : ! local variables
1216 : !
1217 : !----------------------------------------------------------------------
1218 :
1219 : integer (int_kind) :: &
1220 : n, i, j, ic, jc, id, jd, cnt, &! dummy loop indices ! LCOV_EXCL_LINE
1221 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
1222 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
1223 : nblocklist, &! number of blocks in blocklist ! LCOV_EXCL_LINE
1224 : globalID, &! global block ID ! LCOV_EXCL_LINE
1225 : localID ! block location on this processor
1226 :
1227 : integer (int_kind), dimension(:), allocatable :: &
1228 : proc_tmp, &! temp processor id ! LCOV_EXCL_LINE
1229 0 : blocklist ! temp block ordered list
1230 : integer (int_kind), dimension(:,:), allocatable :: &
1231 0 : blockchk ! temp block check array
1232 :
1233 : character(len=*),parameter :: subname='(create_distrb_spiralcenter)'
1234 :
1235 : !----------------------------------------------------------------------
1236 : !
1237 : ! create communicator for this distribution
1238 : !
1239 : !----------------------------------------------------------------------
1240 :
1241 0 : call create_communicator(newDistrb%communicator, nprocs)
1242 :
1243 : !----------------------------------------------------------------------
1244 : !
1245 : ! try to find best processor arrangement
1246 : !
1247 : !----------------------------------------------------------------------
1248 :
1249 0 : newDistrb%nprocs = nprocs
1250 :
1251 : !----------------------------------------------------------------------
1252 : !
1253 : ! allocate space for decomposition
1254 : !
1255 : !----------------------------------------------------------------------
1256 :
1257 : allocate (newDistrb%blockLocation(nblocks_tot), &
1258 0 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
1259 :
1260 0 : allocate (newDistrb%blockCnt(nprocs))
1261 :
1262 : !----------------------------------------------------------------------
1263 : !
1264 : ! create list of blocks starting from center in spiral
1265 : ! pattern is start in center, right 1, up 1, left 2, down 2,
1266 : ! right 3, up 3, left 4, down 4, right 5, up 5, etc.
1267 : ! until all blocks have been accounted for just once.
1268 : ! cnt tracks the up, left, down, right counts and is the emergency
1269 : ! stop
1270 : !
1271 : !----------------------------------------------------------------------
1272 :
1273 0 : allocate(proc_tmp(nprocs))
1274 0 : allocate(blocklist(nblocks_tot))
1275 0 : allocate(blockchk(nblocks_x,nblocks_y))
1276 0 : nblocklist = 0
1277 0 : blocklist = 0
1278 0 : blockchk = 0
1279 0 : processor = 0
1280 0 : globalID = 0
1281 0 : proc_tmp = 0
1282 :
1283 0 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
1284 0 : newDistrb%blockIndex(:,:) = 0
1285 :
1286 0 : jc = nblocks_y/2
1287 0 : ic = nblocks_x/2
1288 :
1289 : ! center block
1290 0 : cnt = 0
1291 0 : j = jc
1292 0 : i = ic
1293 0 : globalID = (j-1)*nblocks_x + i
1294 0 : nblocklist = nblocklist + 1
1295 0 : blocklist(nblocklist) = globalID
1296 0 : blockchk(i,j) = 1
1297 :
1298 0 : do while (minval(blocklist) < 1 .and. cnt < max(nblocks_x,nblocks_y) )
1299 :
1300 0 : cnt = cnt + 1
1301 :
1302 : ! right, j held constant
1303 0 : ic = i
1304 0 : do id = ic+1,ic+cnt
1305 0 : i = max(min(id,nblocks_x),1)
1306 0 : if (blockchk(i,j) == 0) then
1307 0 : globalID = (j-1)*nblocks_x + i
1308 0 : nblocklist = nblocklist + 1
1309 0 : blocklist(nblocklist) = globalID
1310 0 : blockchk(i,j) = 1
1311 : endif
1312 : enddo
1313 :
1314 : ! up, i held constant
1315 0 : jc = j
1316 0 : do jd = jc+1,jc+cnt
1317 0 : j = max(min(jd,nblocks_y),1)
1318 0 : if (blockchk(i,j) == 0) then
1319 0 : globalID = (j-1)*nblocks_x + i
1320 0 : nblocklist = nblocklist + 1
1321 0 : blocklist(nblocklist) = globalID
1322 0 : blockchk(i,j) = 1
1323 : endif
1324 : enddo
1325 :
1326 0 : cnt = cnt + 1
1327 :
1328 : ! left, j held constant
1329 0 : ic = i
1330 0 : do id = ic-1,ic-cnt,-1
1331 0 : i = max(min(id,nblocks_x),1)
1332 0 : if (blockchk(i,j) == 0) then
1333 0 : globalID = (j-1)*nblocks_x + i
1334 0 : nblocklist = nblocklist + 1
1335 0 : blocklist(nblocklist) = globalID
1336 0 : blockchk(i,j) = 1
1337 : endif
1338 : enddo
1339 :
1340 : ! down, i held constant
1341 0 : jc = j
1342 0 : do jd = jc-1,jc-cnt,-1
1343 0 : j = max(min(jd,nblocks_y),1)
1344 0 : if (blockchk(i,j) == 0) then
1345 0 : globalID = (j-1)*nblocks_x + i
1346 0 : nblocklist = nblocklist + 1
1347 0 : blocklist(nblocklist) = globalID
1348 0 : blockchk(i,j) = 1
1349 : endif
1350 : enddo
1351 :
1352 : enddo
1353 :
1354 : if (nblocklist /= nblocks_x*nblocks_y .or. &
1355 0 : maxval(blockchk) /= 1 .or. minval(blockchk) /= 1) then
1356 0 : call abort_ice(subname//'ERROR: blockchk invalid')
1357 0 : return
1358 : endif
1359 0 : deallocate(blockchk)
1360 :
1361 : !----------------------------------------------------------------------
1362 : !
1363 : ! now distribute the blocks in the blocklist roundrobin
1364 : !
1365 : !----------------------------------------------------------------------
1366 :
1367 0 : do n = 1,nblocklist
1368 :
1369 0 : globalID = blocklist(n)
1370 :
1371 0 : if (workPerBlock(globalID) /= 0) then
1372 0 : processor = mod(processor,nprocs) + 1
1373 0 : proc_tmp(processor) = proc_tmp(processor) + 1
1374 0 : localID = proc_tmp(processor)
1375 0 : if (localID > max_blocks) then
1376 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1377 0 : return
1378 : endif
1379 0 : newDistrb%blockLocation(globalID) = processor
1380 0 : newDistrb%blockLocalID (globalID) = localID
1381 0 : newDistrb%blockIndex(processor,localID) = globalID
1382 : else ! no work - eliminate block from distribution
1383 0 : newDistrb%blockLocation(globalID) = 0
1384 0 : newDistrb%blockLocalID (globalID) = 0
1385 : endif
1386 :
1387 : end do
1388 :
1389 0 : newDistrb%numLocalBlocks = proc_tmp(my_task+1)
1390 0 : newDistrb%blockCnt(:) = proc_tmp(:)
1391 0 : deallocate(proc_tmp)
1392 0 : deallocate(blocklist)
1393 :
1394 : ! write(nu_diag,*) 'my_task,newDistrb%numLocalBlocks',&
1395 : ! my_task,newDistrb%numLocalBlocks
1396 :
1397 : !----------------------------------------------------------------------
1398 : !
1399 : ! now store the local info
1400 : !
1401 : !----------------------------------------------------------------------
1402 :
1403 0 : globalID = 0
1404 :
1405 0 : if (newDistrb%numLocalBlocks > 0) then
1406 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
1407 0 : stat=istat)
1408 :
1409 0 : processor = my_task + 1
1410 0 : do localID = 1,newDistrb%numLocalBlocks
1411 0 : newDistrb%blockGlobalID (localID) = newDistrb%blockIndex(processor,&
1412 0 : localID)
1413 : enddo
1414 : endif
1415 :
1416 : !----------------------------------------------------------------------
1417 :
1418 0 : end function create_distrb_spiralcenter
1419 :
1420 : !***********************************************************************
1421 :
1422 0 : function create_distrb_wghtfile(nprocs, workPerBlock) result(newDistrb)
1423 :
1424 : ! This function creates a distribution of blocks across processors
1425 : ! using a simple wghtfile algorithm. Meant for prescribed ice or
1426 : ! standalone CAM mode.
1427 :
1428 : integer (int_kind), intent(in) :: &
1429 : nprocs ! number of processors in this distribution
1430 :
1431 : integer (int_kind), dimension(:), intent(in) :: &
1432 : workPerBlock ! amount of work per block
1433 :
1434 : type (distrb) :: &
1435 : newDistrb ! resulting structure describing Cartesian
1436 : ! distribution of blocks
1437 :
1438 : !----------------------------------------------------------------------
1439 : !
1440 : ! local variables
1441 : !
1442 : !----------------------------------------------------------------------
1443 :
1444 : integer (int_kind) :: &
1445 : i, j, n, &! dummy loop indices ! LCOV_EXCL_LINE
1446 : cnt, &! counter ! LCOV_EXCL_LINE
1447 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
1448 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
1449 : globalID, &! global block ID ! LCOV_EXCL_LINE
1450 : localID ! block location on this processor
1451 :
1452 : integer (int_kind), dimension(:), allocatable :: &
1453 0 : proc_tmp ! temp processor id
1454 :
1455 : logical (log_kind) :: up ! direction of pe counting
1456 :
1457 : character(len=*),parameter :: subname='(create_distrb_wghtfile)'
1458 :
1459 : !----------------------------------------------------------------------
1460 : !
1461 : ! create communicator for this distribution
1462 : !
1463 : !----------------------------------------------------------------------
1464 :
1465 0 : call create_communicator(newDistrb%communicator, nprocs)
1466 :
1467 : !----------------------------------------------------------------------
1468 : !
1469 : ! try to find best processor arrangement
1470 : !
1471 : !----------------------------------------------------------------------
1472 :
1473 0 : newDistrb%nprocs = nprocs
1474 :
1475 : !----------------------------------------------------------------------
1476 : !
1477 : ! allocate space for decomposition
1478 : !
1479 : !----------------------------------------------------------------------
1480 :
1481 : allocate (newDistrb%blockLocation(nblocks_tot), &
1482 0 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
1483 :
1484 0 : allocate (newDistrb%blockCnt(nprocs))
1485 :
1486 : !----------------------------------------------------------------------
1487 : !
1488 : ! distribute blocks across processors, one block per proc until used
1489 : ! work from most expensive workPerBlock to least and go up/down/up/down
1490 : ! in terms of the pe index to try to get better load balance.
1491 : !
1492 : !----------------------------------------------------------------------
1493 :
1494 0 : allocate(proc_tmp(nprocs))
1495 0 : processor = 0
1496 0 : proc_tmp = 0
1497 0 : up = .true.
1498 :
1499 0 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
1500 0 : newDistrb%blockIndex(:,:) = 0
1501 :
1502 0 : if (my_task == master_task) &
1503 0 : write(nu_diag,*) subname,' workPerBlock = ',minval(workPerBlock),maxval(workPerBlock)
1504 0 : if (minval(workPerBlock) < 0 .or. maxval(workPerBlock) > 12) then
1505 0 : write(nu_diag,*) subname,' workPerBlock = ',minval(workPerBlock),maxval(workPerBlock)
1506 0 : call abort_ice(subname//'ERROR: workPerBlock incorrect')
1507 0 : return
1508 : endif
1509 :
1510 : ! do not distribution blocks with work=0
1511 0 : do n=maxval(workPerBlock),1,-1
1512 0 : cnt = 0
1513 0 : do j=1,nblocks_y
1514 0 : do i=1,nblocks_x
1515 :
1516 0 : if (mod(j,2) == 1) then
1517 0 : globalID = (j-1)*nblocks_x + i
1518 : else
1519 0 : globalID = (j-1)*nblocks_x + nblocks_x - i + 1
1520 : endif
1521 :
1522 0 : if (workPerBlock(globalID) == 0) then ! no work - eliminate block from distribution
1523 0 : newDistrb%blockLocation(globalID) = 0
1524 0 : newDistrb%blockLocalID (globalID) = 0
1525 0 : elseif (workPerBlock(globalID) == n) then
1526 0 : cnt = cnt + 1
1527 : ! processor = mod(processor,nprocs) + 1
1528 0 : if (up) then
1529 0 : processor = processor + 1
1530 : else
1531 0 : processor = processor - 1
1532 : endif
1533 0 : if (processor > nprocs) then
1534 0 : up = .false.
1535 0 : processor = nprocs
1536 0 : elseif (processor < 1) then
1537 0 : up = .true.
1538 0 : processor = 1
1539 : endif
1540 0 : proc_tmp(processor) = proc_tmp(processor) + 1
1541 0 : localID = proc_tmp(processor)
1542 0 : if (localID > max_blocks) then
1543 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1544 0 : return
1545 : endif
1546 0 : newDistrb%blockLocation(globalID) = processor
1547 0 : newDistrb%blockLocalID (globalID) = localID
1548 0 : newDistrb%blockIndex(processor,localID) = globalID
1549 : endif
1550 :
1551 : end do
1552 : end do
1553 : ! write(nu_diag,*) 'create_distrb_wghtfile n cnt = ',n,cnt
1554 : end do
1555 :
1556 0 : newDistrb%numLocalBlocks = proc_tmp(my_task+1)
1557 0 : newDistrb%blockCnt(:) = proc_tmp(:)
1558 0 : deallocate(proc_tmp)
1559 :
1560 : ! write(nu_diag,*) 'my_task,newDistrb%numLocalBlocks',&
1561 : ! my_task,newDistrb%numLocalBlocks
1562 :
1563 : !----------------------------------------------------------------------
1564 : !
1565 : ! now store the local info
1566 : !
1567 : !----------------------------------------------------------------------
1568 :
1569 0 : globalID = 0
1570 :
1571 0 : if (newDistrb%numLocalBlocks > 0) then
1572 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
1573 0 : stat=istat)
1574 :
1575 0 : processor = my_task + 1
1576 0 : do localID = 1,newDistrb%numLocalBlocks
1577 0 : newDistrb%blockGlobalID (localID) = newDistrb%blockIndex(processor,&
1578 0 : localID)
1579 : enddo
1580 : endif
1581 :
1582 : !----------------------------------------------------------------------
1583 :
1584 0 : end function create_distrb_wghtfile
1585 :
1586 : !***********************************************************************
1587 :
1588 0 : function create_distrb_sectrobin(nprocs, workPerBlock) result(newDistrb)
1589 :
1590 : ! This function creates a distribution of blocks across processors
1591 : ! using a simple sectrobin algorithm. Mean for prescribed ice or
1592 : ! standalone CAM mode.
1593 :
1594 : integer (int_kind), intent(in) :: &
1595 : nprocs ! number of processors in this distribution
1596 :
1597 : integer (int_kind), dimension(:), intent(in) :: &
1598 : workPerBlock ! amount of work per block
1599 :
1600 : type (distrb) :: &
1601 : newDistrb ! resulting structure describing Cartesian
1602 : ! distribution of blocks
1603 :
1604 : !----------------------------------------------------------------------
1605 : !
1606 : ! local variables
1607 : !
1608 : !----------------------------------------------------------------------
1609 :
1610 : integer (int_kind) :: &
1611 : i, j, &! dummy loop indices ! LCOV_EXCL_LINE
1612 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
1613 : mblocks, &! estimate of max blocks per pe ! LCOV_EXCL_LINE
1614 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
1615 : globalID, &! global block ID ! LCOV_EXCL_LINE
1616 : localID ! block location on this processor
1617 :
1618 : integer (int_kind), dimension(:), allocatable :: &
1619 0 : proc_tmp ! temp processor id
1620 :
1621 : logical (log_kind), dimension(:), allocatable :: &
1622 0 : bfree ! map of assigned blocks
1623 :
1624 : integer (int_kind) :: cnt, blktogether, i2
1625 : integer (int_kind) :: totblocks, nchunks
1626 : logical (log_kind) :: keepgoing
1627 :
1628 : character(len=*),parameter :: subname='(create_distrb_sectrobin)'
1629 :
1630 : !----------------------------------------------------------------------
1631 : !
1632 : ! create communicator for this distribution
1633 : !
1634 : !----------------------------------------------------------------------
1635 :
1636 0 : call create_communicator(newDistrb%communicator, nprocs)
1637 :
1638 : !----------------------------------------------------------------------
1639 : !
1640 : ! try to find best processor arrangement
1641 : !
1642 : !----------------------------------------------------------------------
1643 :
1644 0 : newDistrb%nprocs = nprocs
1645 :
1646 : !----------------------------------------------------------------------
1647 : !
1648 : ! allocate space for decomposition
1649 : !
1650 : !----------------------------------------------------------------------
1651 :
1652 : allocate (newDistrb%blockLocation(nblocks_tot), &
1653 0 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
1654 0 : if (istat > 0) then
1655 : call abort_ice( &
1656 0 : 'create_distrb_sectrobin: error allocating blockLocation or blockLocalID')
1657 0 : return
1658 : endif
1659 :
1660 0 : allocate (newDistrb%blockCnt(nprocs))
1661 :
1662 : !----------------------------------------------------------------------
1663 : !
1664 : ! distribute groups of blocks across processors, one per proc until used
1665 : !
1666 : !----------------------------------------------------------------------
1667 :
1668 0 : allocate(proc_tmp(nprocs))
1669 0 : processor = 0
1670 0 : globalID = 0
1671 0 : proc_tmp = 0
1672 :
1673 0 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
1674 0 : newDistrb%blockIndex(:,:) = 0
1675 :
1676 0 : allocate(bfree(nblocks_x*nblocks_y))
1677 0 : bfree=.true.
1678 :
1679 0 : totblocks = 0
1680 0 : do j=1,nblocks_y
1681 0 : do i=1,nblocks_x
1682 0 : globalID = (j-1)*nblocks_x + i
1683 0 : if (workPerBlock(globalID) /= 0) then
1684 0 : totblocks=totblocks+1
1685 : else ! no work - eliminate block from distribution
1686 0 : bfree(globalID) = .false.
1687 0 : newDistrb%blockLocation(globalID) = 0
1688 0 : newDistrb%blockLocalID (globalID) = 0
1689 : endif
1690 : enddo
1691 : enddo
1692 :
1693 0 : mblocks = totblocks/nprocs
1694 0 : if (mod(totblocks,nprocs) > 0) mblocks=mblocks+1
1695 :
1696 0 : blktogether = max(1,nint(float(totblocks)/float(6*nprocs)))
1697 :
1698 : ! write(nu_diag,*) 'ice_distrb_sectrobin totblocks = ',totblocks,nblocks_y*nblocks_x
1699 :
1700 : !------------------------------
1701 : ! southern group of blocks
1702 : ! weave back and forth in i vs j
1703 : ! go south to north, low - high pes
1704 : !------------------------------
1705 :
1706 0 : processor=1
1707 0 : cnt = 0
1708 0 : keepgoing = .true.
1709 0 : do j=1,nblocks_y
1710 0 : do i=1,nblocks_x
1711 0 : if (mod(j,2) == 0) then
1712 0 : i2 = nblocks_x - i + 1
1713 : else
1714 0 : i2 = i
1715 : endif
1716 0 : globalID = (j-1)*nblocks_x + i2
1717 0 : if (cnt >= blktogether) then
1718 0 : processor = mod(processor,nprocs) + 1
1719 0 : cnt = 0
1720 0 : if (processor == 1) keepgoing = .false.
1721 : endif
1722 : ! write(nu_diag,'(a,6i7,l2)') 'tcx ',i,j,globalID,cnt,blktogether,processor,keepgoing
1723 :
1724 0 : if (keepgoing) then
1725 0 : if (bfree(globalID)) then
1726 0 : if (workPerBlock(globalID) /= 0) then
1727 0 : proc_tmp(processor) = proc_tmp(processor) + 1
1728 0 : localID = proc_tmp(processor)
1729 0 : if (localID > max_blocks) then
1730 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1731 0 : return
1732 : endif
1733 0 : newDistrb%blockLocation(globalID) = processor
1734 0 : newDistrb%blockLocalID (globalID) = localID
1735 0 : newDistrb%blockIndex(processor,localID) = globalID
1736 0 : cnt = cnt + 1
1737 0 : totblocks = totblocks-1
1738 0 : bfree(globalID) = .false.
1739 :
1740 : else ! no work - eliminate block from distribution
1741 0 : bfree(globalID) = .false.
1742 0 : newDistrb%blockLocation(globalID) = 0
1743 0 : newDistrb%blockLocalID (globalID) = 0
1744 : endif
1745 : endif ! bfree
1746 : endif
1747 : end do
1748 : end do
1749 :
1750 : ! write(nu_diag,*) 'ice_distrb_sectrobin totblocks left after southern = ',totblocks
1751 :
1752 : !------------------------------
1753 : ! northern group of blocks
1754 : ! weave back and forth in i vs j
1755 : ! go north to south, high - low pes
1756 : !------------------------------
1757 :
1758 0 : processor=nprocs
1759 0 : cnt = 0
1760 0 : keepgoing = .true.
1761 0 : do j=nblocks_y,1,-1
1762 0 : do i=1,nblocks_x
1763 0 : if (mod(j,2) == 1) then
1764 0 : i2 = nblocks_x - i + 1
1765 : else
1766 0 : i2 = i
1767 : endif
1768 0 : globalID = (j-1)*nblocks_x + i2
1769 0 : if (cnt >= blktogether) then
1770 0 : processor = mod(processor+nprocs-2,nprocs) + 1
1771 0 : cnt = 0
1772 0 : if (processor == nprocs) keepgoing = .false.
1773 : endif
1774 :
1775 0 : if (keepgoing) then
1776 0 : if (bfree(globalID)) then
1777 0 : if (workPerBlock(globalID) /= 0) then
1778 0 : proc_tmp(processor) = proc_tmp(processor) + 1
1779 0 : localID = proc_tmp(processor)
1780 0 : if (localID > max_blocks) then
1781 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1782 0 : return
1783 : endif
1784 0 : newDistrb%blockLocation(globalID) = processor
1785 0 : newDistrb%blockLocalID (globalID) = localID
1786 0 : newDistrb%blockIndex(processor,localID) = globalID
1787 0 : cnt = cnt + 1
1788 0 : totblocks = totblocks - 1
1789 0 : bfree(globalID) = .false.
1790 :
1791 : else ! no work - eliminate block from distribution
1792 0 : bfree(globalID) = .false.
1793 0 : newDistrb%blockLocation(globalID) = 0
1794 0 : newDistrb%blockLocalID (globalID) = 0
1795 : endif
1796 : endif ! bfree
1797 : endif
1798 : end do
1799 : end do
1800 :
1801 : ! write(nu_diag,*) 'ice_distrb_sectrobin totblocks left after northern = ',totblocks
1802 :
1803 : !------------------------------
1804 : ! central group of blocks
1805 : ! weave back and forth in i vs j
1806 : ! go north to south, low - high / low - high pes
1807 : !------------------------------
1808 :
1809 0 : nchunks = 2*nprocs
1810 0 : blktogether = max(1,nint(float(totblocks)/float(nchunks)))
1811 0 : processor=1
1812 0 : cnt = 0
1813 0 : do j=nblocks_y,1,-1
1814 0 : do i=1,nblocks_x
1815 0 : if (mod(j,2) == 1) then
1816 0 : i2 = nblocks_x - i + 1
1817 : else
1818 0 : i2 = i
1819 : endif
1820 0 : globalID = (j-1)*nblocks_x + i2
1821 0 : if (totblocks > 0) then
1822 0 : do while (proc_tmp(processor) >= mblocks .or. cnt >= blktogether)
1823 0 : nchunks = nchunks - 1
1824 0 : if (nchunks == 0) then
1825 0 : blktogether = 1
1826 : else
1827 0 : blktogether = max(1,nint(float(totblocks)/float(nchunks)))
1828 : endif
1829 0 : cnt = 0
1830 0 : processor = mod(processor,nprocs) + 1
1831 : enddo
1832 : endif
1833 :
1834 : ! write(nu_diag,*) 'ice_distrb_sectrobin central ',i,j,totblocks,cnt,nchunks,blktogether,processor
1835 :
1836 0 : if (bfree(globalID)) then
1837 0 : if (workPerBlock(globalID) /= 0) then
1838 0 : proc_tmp(processor) = proc_tmp(processor) + 1
1839 0 : localID = proc_tmp(processor)
1840 0 : if (localID > max_blocks) then
1841 0 : call abort_ice(subname//'ERROR: max_blocks too small')
1842 0 : return
1843 : endif
1844 0 : newDistrb%blockLocation(globalID) = processor
1845 0 : newDistrb%blockLocalID (globalID) = localID
1846 0 : newDistrb%blockIndex(processor,localID) = globalID
1847 0 : cnt = cnt + 1
1848 0 : totblocks = totblocks-1
1849 0 : bfree(globalID) = .false.
1850 :
1851 : else ! no work - eliminate block from distribution
1852 0 : bfree(globalID) = .false.
1853 0 : newDistrb%blockLocation(globalID) = 0
1854 0 : newDistrb%blockLocalID (globalID) = 0
1855 : endif
1856 : endif ! bfree
1857 : end do
1858 : end do
1859 :
1860 0 : newDistrb%numLocalBlocks = proc_tmp(my_task+1)
1861 0 : newDistrb%blockCnt(:) = proc_tmp(:)
1862 0 : deallocate(proc_tmp)
1863 0 : deallocate(bfree)
1864 :
1865 : !----------------------------------------------------------------------
1866 : !
1867 : ! now store the local info
1868 : !
1869 : !----------------------------------------------------------------------
1870 :
1871 0 : globalID = 0
1872 :
1873 0 : if (newDistrb%numLocalBlocks > 0) then
1874 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
1875 0 : stat=istat)
1876 0 : if (istat > 0) then
1877 : call abort_ice( &
1878 0 : 'create_distrb_sectrobin: error allocating numLocalBlocks')
1879 0 : return
1880 : endif
1881 :
1882 0 : processor = my_task + 1
1883 0 : do localID = 1,newDistrb%numLocalBlocks
1884 0 : newDistrb%blockGlobalID (localID) = newDistrb%blockIndex(processor,&
1885 0 : localID)
1886 : enddo
1887 : endif
1888 :
1889 : !----------------------------------------------------------------------
1890 :
1891 0 : end function create_distrb_sectrobin
1892 :
1893 : !***********************************************************************
1894 :
1895 0 : function create_distrb_sectcart(nprocs, workPerBlock) result(newDistrb)
1896 :
1897 : ! This function creates a distribution of blocks across processors
1898 : ! using a simple sectcart algorithm. Mean for prescribed ice or
1899 : ! standalone CAM mode.
1900 :
1901 : integer (int_kind), intent(in) :: &
1902 : nprocs ! number of processors in this distribution
1903 :
1904 : integer (int_kind), dimension(:), intent(in) :: &
1905 : workPerBlock ! amount of work per block
1906 :
1907 : type (distrb) :: &
1908 : newDistrb ! resulting structure describing Cartesian
1909 : ! distribution of blocks
1910 :
1911 : !----------------------------------------------------------------------
1912 : !
1913 : ! local variables
1914 : !
1915 : !----------------------------------------------------------------------
1916 :
1917 : integer (int_kind) :: &
1918 : i, j, i2, j2, &! dummy loop indices ! LCOV_EXCL_LINE
1919 : istat, &! status flag for allocation ! LCOV_EXCL_LINE
1920 : processor, &! processor position in cartesian decomp ! LCOV_EXCL_LINE
1921 : globalID, &! global block ID ! LCOV_EXCL_LINE
1922 : localID, &! block location on this processor ! LCOV_EXCL_LINE
1923 : blktogether, &! number of blocks together ! LCOV_EXCL_LINE
1924 : cnt ! counter
1925 :
1926 : integer (int_kind), dimension(:), allocatable :: &
1927 0 : proc_tmp ! temp processor id
1928 :
1929 : integer (int_kind) :: n
1930 :
1931 : character(len=*),parameter :: subname='(create_distrb_sectcart)'
1932 :
1933 : !----------------------------------------------------------------------
1934 : !
1935 : ! create communicator for this distribution
1936 : !
1937 : !----------------------------------------------------------------------
1938 :
1939 0 : call create_communicator(newDistrb%communicator, nprocs)
1940 :
1941 : !----------------------------------------------------------------------
1942 : !
1943 : ! try to find best processor arrangement
1944 : !
1945 : !----------------------------------------------------------------------
1946 :
1947 0 : newDistrb%nprocs = nprocs
1948 :
1949 : !----------------------------------------------------------------------
1950 : !
1951 : ! allocate space for decomposition
1952 : !
1953 : !----------------------------------------------------------------------
1954 :
1955 : allocate (newDistrb%blockLocation(nblocks_tot), &
1956 0 : newDistrb%blockLocalID (nblocks_tot), stat=istat)
1957 0 : if (istat > 0) then
1958 : call abort_ice( &
1959 0 : 'create_distrb_sectcart: error allocating blockLocation or blockLocalID')
1960 0 : return
1961 : endif
1962 :
1963 0 : allocate (newDistrb%blockCnt(nprocs))
1964 : !----------------------------------------------------------------------
1965 : !
1966 : ! distribute blocks linearly across processors in quadrants
1967 : !
1968 : !----------------------------------------------------------------------
1969 :
1970 0 : allocate(proc_tmp(nprocs))
1971 0 : proc_tmp = 0
1972 :
1973 0 : allocate(newDistrb%blockIndex(nprocs,max_blocks))
1974 0 : newDistrb%blockIndex(:,:) = 0
1975 :
1976 0 : blktogether = max(1,nint(float(nblocks_x*nblocks_y)/float(4*nprocs)))
1977 :
1978 : ! --- two phases, reset processor and cnt for each phase
1979 : ! --- phase 1 is south to north, east to west on the left half of the domain
1980 : ! --- phase 2 is north to south, east to west on the right half of the domain
1981 :
1982 0 : if (mod(nblocks_x,2) /= 0) then
1983 0 : call abort_ice(subname//'ERROR: nblocks_x not divisible by 2')
1984 0 : return
1985 : endif
1986 :
1987 0 : do n=1,2
1988 0 : processor = 1
1989 0 : cnt = 0
1990 0 : do j2=1,nblocks_y
1991 0 : do i2=1,nblocks_x/2
1992 :
1993 0 : if (n == 1) then
1994 0 : i = i2
1995 0 : j = j2
1996 : else
1997 0 : i = nblocks_x/2 + i2
1998 0 : j = nblocks_y - j2 + 1
1999 : endif
2000 :
2001 0 : globalID = (j-1)*nblocks_x + i
2002 0 : if (cnt >= blktogether) then
2003 0 : processor = mod(processor,nprocs) + 1
2004 0 : cnt = 0
2005 : endif
2006 0 : cnt = cnt + 1
2007 :
2008 0 : if (workPerBlock(globalID) /= 0) then
2009 0 : proc_tmp(processor) = proc_tmp(processor) + 1
2010 0 : localID = proc_tmp(processor)
2011 0 : if (localID > max_blocks) then
2012 0 : call abort_ice(subname//'ERROR: max_blocks too small')
2013 0 : return
2014 : endif
2015 0 : newDistrb%blockLocation(globalID) = processor
2016 0 : newDistrb%blockLocalID (globalID) = localID
2017 0 : newDistrb%blockIndex(processor,localID) = globalID
2018 : else ! no work - eliminate block from distribution
2019 0 : newDistrb%blockLocation(globalID) = 0
2020 0 : newDistrb%blockLocalID (globalID) = 0
2021 : endif
2022 :
2023 : end do
2024 : end do
2025 : end do
2026 :
2027 0 : newDistrb%numLocalBlocks = proc_tmp(my_task+1)
2028 0 : newDistrb%blockCnt(:) = proc_tmp(:)
2029 0 : deallocate(proc_tmp)
2030 :
2031 : ! write(nu_diag,*) 'my_task,newDistrb%numLocalBlocks',&
2032 : ! my_task,newDistrb%numLocalBlocks
2033 :
2034 : !----------------------------------------------------------------------
2035 : !
2036 : ! now store the local info
2037 : !
2038 : !----------------------------------------------------------------------
2039 :
2040 0 : globalID = 0
2041 :
2042 0 : if (newDistrb%numLocalBlocks > 0) then
2043 : allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), &
2044 0 : stat=istat)
2045 0 : if (istat > 0) then
2046 : call abort_ice( &
2047 0 : 'create_distrb_sectcart: error allocating numLocalBlocks')
2048 0 : return
2049 : endif
2050 :
2051 0 : processor = my_task + 1
2052 0 : do localID = 1,newDistrb%numLocalBlocks
2053 0 : newDistrb%blockGlobalID (localID) = newDistrb%blockIndex(processor,&
2054 0 : localID)
2055 : enddo
2056 : endif
2057 :
2058 : !----------------------------------------------------------------------
2059 :
2060 0 : end function create_distrb_sectcart
2061 :
2062 : !**********************************************************************
2063 :
2064 0 : function create_distrb_spacecurve(nprocs,work_per_block)
2065 :
2066 : ! This function distributes blocks across processors in a
2067 : ! load-balanced manner using space-filling curves
2068 : ! added by J. Dennis 3/10/06
2069 :
2070 : use ice_spacecurve
2071 :
2072 : integer (int_kind), intent(in) :: &
2073 : nprocs ! number of processors in this distribution
2074 :
2075 : integer (int_kind), dimension(:), intent(in) :: &
2076 : work_per_block ! amount of work per block
2077 :
2078 : type (distrb) :: &
2079 : create_distrb_spacecurve ! resulting structure describing
2080 : ! load-balanced distribution of blocks
2081 :
2082 : !----------------------------------------------------------------------
2083 : !
2084 : ! local variables
2085 : !
2086 : !----------------------------------------------------------------------
2087 :
2088 : integer (int_kind) :: &
2089 : i,j,n ,&! dummy loop indices ! LCOV_EXCL_LINE
2090 : pid ,&! dummy for processor id ! LCOV_EXCL_LINE
2091 : localID ! local block position on processor
2092 :
2093 : integer (int_kind), dimension(:),allocatable :: &
2094 0 : idxT_i,idxT_j ! Temporary indices for SFC
2095 :
2096 : integer (int_kind), dimension(:,:),allocatable :: &
2097 : Mesh ,&! !arrays to hold Space-filling curve ! LCOV_EXCL_LINE
2098 : Mesh2 ,&! ! LCOV_EXCL_LINE
2099 0 : Mesh3 !
2100 :
2101 : integer (int_kind) :: &
2102 : nblocksL,nblocks, &! Number of blocks local and total ! LCOV_EXCL_LINE
2103 : ii,extra,tmp1, &! loop tempories used for ! LCOV_EXCL_LINE
2104 : s1,ig ! partitioning curve
2105 :
2106 : type (factor_t) :: xdim,ydim
2107 :
2108 : integer (int_kind) :: it,jj,i2,j2
2109 : integer (int_kind) :: curveSize,sb_x,sb_y,itmp,numfac
2110 : integer (int_kind) :: subNum, sfcNum
2111 : logical :: foundx
2112 :
2113 : integer (int_kind), dimension(:), allocatable :: &
2114 0 : proc_tmp ! temp processor id for rake algrthm
2115 :
2116 : type (distrb) :: dist ! temp hold distribution
2117 :
2118 : character(len=*),parameter :: subname='(create_distrb_spacecurve)'
2119 :
2120 : !------------------------------------------------------
2121 : ! Space filling curves only work if:
2122 : !
2123 : ! nblocks_x = nblocks_y
2124 : ! nblocks_x = 2^m 3^n 5^p where m,n,p are integers
2125 : !------------------------------------------------------
2126 :
2127 0 : if((.not. IsFactorable(nblocks_y)) .or. (.not. IsFactorable(nblocks_x))) then
2128 0 : create_distrb_spacecurve = create_distrb_cart(nprocs, work_per_block)
2129 0 : return
2130 : endif
2131 :
2132 : !-----------------------------------------------
2133 : ! Factor the numbers of blocks in each dimension
2134 : !-----------------------------------------------
2135 :
2136 0 : xdim = Factor(nblocks_x)
2137 0 : ydim = Factor(nblocks_y)
2138 0 : numfac = xdim%numfact
2139 :
2140 : !---------------------------------------------
2141 : ! Match the common factors to create SFC curve
2142 : !---------------------------------------------
2143 :
2144 0 : curveSize=1
2145 0 : do it=1,numfac
2146 0 : call MatchFactor(xdim,ydim,itmp,foundX)
2147 0 : curveSize = itmp*curveSize
2148 : enddo
2149 :
2150 : !--------------------------------------
2151 : ! determine the size of the sub-blocks
2152 : ! within the space-filling curve
2153 : !--------------------------------------
2154 :
2155 0 : sb_x = ProdFactor(xdim)
2156 0 : sb_y = ProdFactor(ydim)
2157 :
2158 0 : call create_communicator(dist%communicator, nprocs)
2159 :
2160 0 : dist%nprocs = nprocs
2161 :
2162 : !----------------------------------------------------------------------
2163 : !
2164 : ! allocate space for decomposition
2165 : !
2166 : !----------------------------------------------------------------------
2167 :
2168 : allocate (dist%blockLocation(nblocks_tot), &
2169 0 : dist%blockLocalID (nblocks_tot))
2170 :
2171 0 : dist%blockLocation=0
2172 0 : dist%blockLocalID =0
2173 :
2174 0 : allocate (dist%blockCnt(nprocs))
2175 0 : dist%blockCnt(:) = 0
2176 :
2177 0 : allocate(dist%blockIndex(nprocs,max_blocks))
2178 0 : dist%blockIndex(:,:) = 0
2179 :
2180 : !----------------------------------------------------------------------
2181 : ! Create the array to hold the SFC and indices into it
2182 : !----------------------------------------------------------------------
2183 :
2184 0 : allocate(Mesh(curveSize,curveSize))
2185 0 : allocate(Mesh2(nblocks_x,nblocks_y))
2186 0 : allocate(Mesh3(nblocks_x,nblocks_y))
2187 0 : allocate(idxT_i(nblocks_tot),idxT_j(nblocks_tot))
2188 :
2189 0 : Mesh = 0
2190 0 : Mesh2 = 0
2191 0 : Mesh3 = 0
2192 :
2193 : !----------------------------------------------------------------------
2194 : ! Generate the space-filling curve
2195 : !----------------------------------------------------------------------
2196 :
2197 0 : call GenSpaceCurve(Mesh)
2198 0 : Mesh = Mesh + 1 ! make it 1-based indexing
2199 : ! if (debug_blocks) then
2200 : ! if (my_task == master_task) call PrintCurve(Mesh)
2201 : ! endif
2202 :
2203 : !-----------------------------------------------
2204 : ! Reindex the SFC to address internal sub-blocks
2205 : !-----------------------------------------------
2206 :
2207 0 : do j=1,curveSize
2208 0 : do i=1,curveSize
2209 0 : sfcNum = (Mesh(i,j) - 1)*(sb_x*sb_y) + 1
2210 0 : do jj=1,sb_y
2211 0 : do ii=1,sb_x
2212 0 : subNum = (jj-1)*sb_x + (ii-1)
2213 0 : i2 = (i-1)*sb_x + ii
2214 0 : j2 = (j-1)*sb_y + jj
2215 0 : Mesh2(i2,j2) = sfcNum + subNum
2216 : enddo
2217 : enddo
2218 : enddo
2219 : enddo
2220 :
2221 : !------------------------------------------------
2222 : ! create a linear array of i,j coordinates of SFC
2223 : !------------------------------------------------
2224 :
2225 0 : idxT_i=0;idxT_j=0
2226 0 : do j=1,nblocks_y
2227 0 : do i=1,nblocks_x
2228 0 : n = (j-1)*nblocks_x + i
2229 0 : ig = Mesh2(i,j)
2230 0 : if(work_per_block(n) /= 0) then
2231 0 : idxT_i(ig)=i;idxT_j(ig)=j
2232 : endif
2233 : enddo
2234 : enddo
2235 :
2236 : !-----------------------------
2237 : ! Compress out the land blocks
2238 : !-----------------------------
2239 :
2240 0 : ii=0
2241 0 : do i=1,nblocks_tot
2242 0 : if(IdxT_i(i) .gt. 0) then
2243 0 : ii=ii+1
2244 0 : Mesh3(idxT_i(i),idxT_j(i)) = ii
2245 : endif
2246 : enddo
2247 0 : nblocks=ii
2248 0 : if (debug_blocks) then
2249 0 : if (my_task == master_task) call PrintCurve(Mesh3)
2250 : endif
2251 :
2252 : !----------------------------------------------------
2253 : ! Compute the partitioning of the space-filling curve
2254 : !----------------------------------------------------
2255 :
2256 0 : nblocksL = nblocks/nprocs
2257 : ! every cpu gets nblocksL blocks, but the first 'extra' get nblocksL+1
2258 0 : extra = mod(nblocks,nprocs)
2259 0 : s1 = extra*(nblocksL+1)
2260 : ! split curve into two curves:
2261 : ! 1 ... s1 s2 ... nblocks
2262 : !
2263 : ! s1 = extra*(nblocksL+1) (count be 0)
2264 : ! s2 = s1+1
2265 : !
2266 : ! First region gets nblocksL+1 blocks per partition
2267 : ! Second region gets nblocksL blocks per partition
2268 : ! if(debug_blocks) write(nu_diag,*) 'nprocs,extra,nblocks,nblocksL,s1: ', &
2269 : ! nprocs,extra,nblocks,nblocksL,s1
2270 :
2271 : !-----------------------------------------------------------
2272 : ! Use the SFC to partition the blocks across processors
2273 : !-----------------------------------------------------------
2274 :
2275 0 : do j=1,nblocks_y
2276 0 : do i=1,nblocks_x
2277 0 : n = (j-1)*nblocks_x + i
2278 0 : ii = Mesh3(i,j)
2279 0 : if(ii>0) then
2280 0 : if(ii<=s1) then
2281 : ! ------------------------------------
2282 : ! If on the first region of curve
2283 : ! all processes get nblocksL+1 blocks
2284 : ! ------------------------------------
2285 0 : ii=ii-1
2286 0 : tmp1 = ii/(nblocksL+1)
2287 0 : dist%blockLocation(n) = tmp1+1
2288 : else
2289 : ! ------------------------------------
2290 : ! If on the second region of curve
2291 : ! all processes get nblocksL blocks
2292 : ! ------------------------------------
2293 0 : ii=ii-s1-1
2294 0 : tmp1 = ii/nblocksL
2295 0 : dist%blockLocation(n) = extra + tmp1 + 1
2296 : endif
2297 : endif
2298 : enddo
2299 : enddo
2300 :
2301 : !----------------------------------------------------------------------
2302 : ! Reset the dist data structure
2303 : !----------------------------------------------------------------------
2304 :
2305 0 : allocate(proc_tmp(nprocs))
2306 0 : proc_tmp = 0
2307 :
2308 0 : do n=1,nblocks_tot
2309 0 : pid = dist%blockLocation(n)
2310 : !!!dist%blockLocation(n) = pid
2311 :
2312 0 : if(pid>0) then
2313 0 : proc_tmp(pid) = proc_tmp(pid) + 1
2314 0 : if (proc_tmp(pid) > max_blocks) then
2315 0 : call abort_ice(subname//'ERROR: max_blocks too small')
2316 0 : return
2317 : endif
2318 0 : dist%blockLocalID(n) = proc_tmp(pid)
2319 0 : dist%blockIndex(pid,proc_tmp(pid)) = n
2320 : else
2321 0 : dist%blockLocalID(n) = 0
2322 : endif
2323 : enddo
2324 :
2325 0 : dist%numLocalBlocks = proc_tmp(my_task+1)
2326 0 : dist%blockCnt(:) = proc_tmp(:)
2327 :
2328 0 : if (dist%numLocalBlocks > 0) then
2329 0 : allocate (dist%blockGlobalID(dist%numLocalBlocks))
2330 0 : dist%blockGlobalID = 0
2331 : endif
2332 0 : localID = 0
2333 0 : do n=1,nblocks_tot
2334 0 : if (dist%blockLocation(n) == my_task+1) then
2335 0 : localID = localID + 1
2336 0 : dist%blockGlobalID(localID) = n
2337 : endif
2338 : enddo
2339 :
2340 : ! if (debug_blocks) then
2341 : ! if (my_task == master_task) write(nu_diag,*) 'dist%blockLocation:= ',dist%blockLocation
2342 : ! write(nu_diag,*) 'IAM: ',my_task,' SpaceCurve: Number of blocks {total,local} :=', &
2343 : ! nblocks_tot,nblocks,proc_tmp(my_task+1)
2344 : ! endif
2345 : !---------------------------------
2346 : ! Deallocate temporary arrays
2347 : !---------------------------------
2348 0 : deallocate(proc_tmp)
2349 0 : deallocate(Mesh,Mesh2,Mesh3)
2350 0 : deallocate(idxT_i,idxT_j)
2351 :
2352 0 : create_distrb_spacecurve = dist ! return the result
2353 :
2354 : !----------------------------------------------------------------------
2355 :
2356 0 : end function create_distrb_spacecurve
2357 :
2358 : !**********************************************************************
2359 :
2360 0 : subroutine ice_distributionRake (procWork, procID, blockWork, &
2361 0 : priority, distribution)
2362 :
2363 : ! This subroutine performs a rake algorithm to distribute the work
2364 : ! along a vector of processors. In the rake algorithm, a work
2365 : ! threshold is first set. Then, moving from left to right, work
2366 : ! above that threshold is raked to the next processor in line.
2367 : ! The process continues until the end of the vector is reached
2368 : ! and then the threshold is reduced by one for a second rake pass.
2369 : ! In this implementation, a priority for moving blocks is defined
2370 : ! such that the rake algorithm chooses the highest priority
2371 : ! block to be moved to the next processor. This can be used
2372 : ! for example to always choose the eastern-most block or to
2373 : ! ensure a block does not stray too far from its neighbors.
2374 :
2375 : integer (int_kind), intent(in), dimension(:) :: &
2376 : blockWork ,&! amount of work per block ! LCOV_EXCL_LINE
2377 : procID ! global processor number
2378 :
2379 : integer (int_kind), intent(inout), dimension(:) :: &
2380 : procWork ,&! amount of work per processor ! LCOV_EXCL_LINE
2381 : priority ! priority for moving a given block
2382 :
2383 : type (distrb), intent(inout) :: &
2384 : distribution ! distribution to change
2385 :
2386 : !----------------------------------------------------------------------
2387 : !
2388 : ! local variables
2389 : !
2390 : !----------------------------------------------------------------------
2391 :
2392 : integer (int_kind) :: &
2393 : i, n, &! dummy loop indices ! LCOV_EXCL_LINE
2394 : np1, &! n+1 corrected for cyclical wrap ! LCOV_EXCL_LINE
2395 : iproc, inext, &! processor ids for current and next ! LCOV_EXCL_LINE
2396 : nprocs, numBlocks, &! number of blocks, processors ! LCOV_EXCL_LINE
2397 : lastPriority, &! priority for most recent block ! LCOV_EXCL_LINE
2398 : minPriority, &! minimum priority ! LCOV_EXCL_LINE
2399 : lastLoc, &! location for most recent block ! LCOV_EXCL_LINE
2400 : meanWork, maxWork, &! mean,max work per processor ! LCOV_EXCL_LINE
2401 : diffWork, &! work differences ! LCOV_EXCL_LINE
2402 : numTransfers ! counter for number of block transfers
2403 :
2404 : character(len=*),parameter :: subname='(ice_distributionRake)'
2405 :
2406 : !----------------------------------------------------------------------
2407 : !
2408 : ! initialization
2409 : !
2410 : !----------------------------------------------------------------------
2411 :
2412 0 : nprocs = size(procWork)
2413 0 : numBlocks = size(blockWork)
2414 :
2415 : !*** compute mean,max work per processor
2416 :
2417 0 : meanWork = sum(procWork)/nprocs + 1
2418 0 : maxWork = maxval(procWork)
2419 : ! residual = mod(meanWork,nprocs)
2420 :
2421 0 : minPriority = 1000000
2422 0 : do n=1,nprocs
2423 0 : iproc = procID(n)
2424 0 : do i=1,numBlocks
2425 0 : if (distribution%blockLocation(i) == iproc) then
2426 0 : minPriority = min(minPriority,priority(i))
2427 : endif
2428 : end do
2429 : end do
2430 :
2431 : !----------------------------------------------------------------------
2432 : !
2433 : ! do two sets of transfers
2434 : !
2435 : !----------------------------------------------------------------------
2436 :
2437 0 : transferLoop: do
2438 :
2439 : !----------------------------------------------------------------------
2440 : !
2441 : ! do rake across the processors
2442 : !
2443 : !----------------------------------------------------------------------
2444 :
2445 0 : numTransfers = 0
2446 0 : do n=1,nprocs
2447 0 : if (n < nprocs) then
2448 0 : np1 = n+1
2449 : else
2450 0 : np1 = 1
2451 : endif
2452 0 : iproc = procID(n)
2453 0 : inext = procID(np1)
2454 :
2455 0 : if (procWork(n) > meanWork) then !*** pass work to next
2456 :
2457 0 : diffWork = procWork(n) - meanWork
2458 :
2459 0 : rake1: do while (diffWork > 1)
2460 :
2461 : !*** attempt to find a block with the required
2462 : !*** amount of work and with the highest priority
2463 : !*** for moving (eg boundary blocks first)
2464 :
2465 0 : lastPriority = 0
2466 0 : lastLoc = 0
2467 :
2468 0 : do i=1,numBlocks
2469 0 : if (distribution%blockLocation(i) == iproc) then
2470 0 : if (priority(i) > lastPriority ) then
2471 0 : lastPriority = priority(i)
2472 0 : lastLoc = i
2473 : endif
2474 : endif
2475 : end do
2476 0 : if (lastLoc == 0) exit rake1 ! could not shift work
2477 :
2478 0 : numTransfers = numTransfers + 1
2479 0 : distribution%blockLocation(lastLoc) = inext
2480 0 : if (np1 == 1) priority(lastLoc) = minPriority
2481 0 : diffWork = diffWork - blockWork(lastLoc)
2482 :
2483 0 : procWork(n ) = procWork(n )-blockWork(lastLoc)
2484 0 : procWork(np1) = procWork(np1)+blockWork(lastLoc)
2485 : end do rake1
2486 : endif
2487 :
2488 : end do
2489 :
2490 : !----------------------------------------------------------------------
2491 : !
2492 : ! increment meanWork by one and repeat
2493 : !
2494 : !----------------------------------------------------------------------
2495 :
2496 0 : meanWork = meanWork + 1
2497 0 : if (numTransfers == 0 .or. meanWork > maxWork) exit transferLoop
2498 :
2499 : end do transferLoop
2500 :
2501 : !----------------------------------------------------------------------
2502 :
2503 0 : end subroutine ice_distributionRake
2504 :
2505 : !***********************************************************************
2506 :
2507 0 : end module ice_distribution
2508 :
2509 : !|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|