butterflypack: Support fj Fortran (#17514)

* butterflypack: Support fj Fortran

* butterflypack: split patch

* butteflypack: add ieee_support_nan
This commit is contained in:
ketsubouchi 2020-10-19 13:16:19 +09:00 committed by GitHub
parent 008b77a2a7
commit 05ffbbc0be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1137 additions and 0 deletions

View File

@ -0,0 +1,269 @@
diff -u -r a/EXAMPLE/EMCURV_Driver.f90 b/EXAMPLE/EMCURV_Driver.f90
--- a/EXAMPLE/EMCURV_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMCURV_Driver.f90 2020-07-13 10:55:08.000000000 +0900
@@ -71,6 +71,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/EMCURV_Eigen_Driver.f90 b/EXAMPLE/EMCURV_Eigen_Driver.f90
--- a/EXAMPLE/EMCURV_Eigen_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMCURV_Eigen_Driver.f90 2020-07-13 10:55:29.000000000 +0900
@@ -84,6 +84,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/EMCURV_Module.f90 b/EXAMPLE/EMCURV_Module.f90
--- a/EXAMPLE/EMCURV_Module.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMCURV_Module.f90 2020-07-13 16:32:29.000000000 +0900
@@ -23,6 +23,7 @@
module EMCURV_MODULE
use BPACK_DEFS
use MISC_Utilities
+use service_routines,only:secnds
implicit none
!**** define your application-related variables here
diff -u -r a/EXAMPLE/EMSURF_Driver.f90 b/EXAMPLE/EMSURF_Driver.f90
--- a/EXAMPLE/EMSURF_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMSURF_Driver.f90 2020-07-13 10:55:49.000000000 +0900
@@ -64,6 +64,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/EMSURF_Eigen_Driver.f90 b/EXAMPLE/EMSURF_Eigen_Driver.f90
--- a/EXAMPLE/EMSURF_Eigen_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMSURF_Eigen_Driver.f90 2020-07-13 10:56:08.000000000 +0900
@@ -82,6 +82,8 @@
integer nargs,flag
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/EMSURF_Module.f90 b/EXAMPLE/EMSURF_Module.f90
--- a/EXAMPLE/EMSURF_Module.f90 2020-07-16 17:39:15.000000000 +0900
+++ b/EXAMPLE/EMSURF_Module.f90 2020-07-13 16:34:10.000000000 +0900
@@ -23,6 +23,7 @@
module EMSURF_MODULE
use BPACK_DEFS
use MISC_Utilities
+use service_routines,only:secnds
implicit none
!**** define your application-related variables here
diff -u -r a/EXAMPLE/FrontalDist_Driver.f90 b/EXAMPLE/FrontalDist_Driver.f90
--- a/EXAMPLE/FrontalDist_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/FrontalDist_Driver.f90 2020-07-13 10:56:28.000000000 +0900
@@ -279,6 +279,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/Frontal_Driver.f90 b/EXAMPLE/Frontal_Driver.f90
--- a/EXAMPLE/Frontal_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/Frontal_Driver.f90 2020-07-13 10:56:40.000000000 +0900
@@ -272,6 +272,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/FULLMAT_Driver.f90 b/EXAMPLE/FULLMAT_Driver.f90
--- a/EXAMPLE/FULLMAT_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/FULLMAT_Driver.f90 2020-07-13 10:56:53.000000000 +0900
@@ -176,6 +176,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
!**** nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/EXAMPLE/FULLMATKERREG_Driver.f90 b/EXAMPLE/FULLMATKERREG_Driver.f90
--- a/EXAMPLE/FULLMATKERREG_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/FULLMATKERREG_Driver.f90 2020-07-13 16:35:24.000000000 +0900
@@ -22,6 +22,7 @@
module APPLICATION_MODULE
use BPACK_DEFS
+use service_routines,only:secnds
implicit none
!**** define your application-related variables here
diff -u -r a/EXAMPLE/KERREG_Driver.f90 b/EXAMPLE/KERREG_Driver.f90
--- a/EXAMPLE/KERREG_Driver.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/KERREG_Driver.f90 2020-07-13 16:39:05.000000000 +0900
@@ -22,6 +22,7 @@
module APPLICATION_MODULE
use BPACK_DEFS
+use service_routines,only:secnds
implicit none
!**** define your application-related variables here
@@ -125,6 +126,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
allocate(groupmembers(nmpi))
diff -u -r a/EXAMPLE/SMAT_Driver.f90 b/EXAMPLE/SMAT_Driver.f90
--- a/EXAMPLE/SMAT_Driver.f90 2020-07-13 09:36:53.000000000 +0900
+++ b/EXAMPLE/SMAT_Driver.f90 2020-07-13 10:24:12.000000000 +0900
@@ -268,6 +268,8 @@
integer nargs,flag
integer v_major,v_minor,v_bugfix
+ integer, external :: iargc
+
! nmpi and groupmembers should be provided by the user
call MPI_Init(ierr)
call MPI_Comm_size(MPI_Comm_World,nmpi,ierr)
diff -u -r a/SRC/BPACK_constr.f90 b/SRC/BPACK_constr.f90
--- a/SRC/BPACK_constr.f90 2020-07-17 14:25:38.000000000 +0900
+++ b/SRC/BPACK_constr.f90 2020-07-13 09:25:41.000000000 +0900
@@ -1043,9 +1043,11 @@
curr=>lstr%head
curc=>lstc%head
do nn=1,Ninter
- select type(ptrr=>curr%item)
+ ptrr=>curr%item
+ select type(ptrr)
type is(iarray)
- select type(ptrc=>curc%item)
+ ptrc=>curc%item
+ select type(ptrc)
type is(iarray)
call Hmat_MapIntersec2Block_Loc(blocks_o,option,stats,msh,ptree,inters,nn,ptrr,ptrc,lstblk)
end select
@@ -1097,9 +1099,11 @@
curc=>blocks%lstc%head
allocate(blocks%inters(blocks%lstr%num_nods))
do nn=1,blocks%lstr%num_nods ! loop all lists of list of rows and columns
- select type(ptrr=>curr%item)
+ ptrr=>curr%item
+ select type(ptrr)
type is (iarray)
- select type(ptrc=>curc%item)
+ ptrc=>curc%item
+ select type(ptrc)
type is (iarray)
blocks%inters(nn)%nr=ptrr%num_nods
allocate(blocks%inters(nn)%rows(ptrr%num_nods))
@@ -1323,9 +1327,11 @@
curr=>lstr%head
curc=>lstc%head
do nn=1,Ninter
- select type(ptrr=>curr%item)
+ ptrr=>curr%item
+ select type(ptrr)
type is(iarray)
- select type(ptrc=>curc%item)
+ ptrc=>curc%item
+ select type(ptrc)
type is(iarray)
select case(option%format)
case(HODLR)
@@ -1383,9 +1389,11 @@
curc=>blocks%lstc%head
allocate(blocks%inters(blocks%lstr%num_nods))
do nn=1,blocks%lstr%num_nods ! loop all lists of list of rows and columns
- select type(ptrr=>curr%item)
+ ptrr=>curr%item
+ select type(ptrr)
type is (iarray)
- select type(ptrc=>curc%item)
+ ptrc=>curc%item
+ select type(ptrc)
type is (iarray)
blocks%inters(nn)%nr=ptrr%num_nods
allocate(blocks%inters(nn)%rows(ptrr%num_nods))
diff -u -r a/SRC/BPACK_defs.f90 b/SRC/BPACK_defs.f90
--- a/SRC/BPACK_defs.f90 2020-07-17 14:24:58.000000000 +0900
+++ b/SRC/BPACK_defs.f90 2020-07-10 17:25:39.000000000 +0900
@@ -245,7 +245,7 @@
! integer data_type ! the block data_type, need better documentation later
! integer nested_num ! depreciated
integer,allocatable :: ipiv(:) ! permutation of the LU of the dense diagonal blocks
- integer blockinfo_MPI(MPI_Header)
+ integer blockinfo_MPI(MPI_Header)
integer length_Butterfly_index_MPI ! length of the index message, the first INDEX_Header integers are 1. decpreciated 2. rankmax 3. level_butterfly. 4. num_blocks
integer length_Butterfly_data_MPI ! length of the value message
DT,allocatable :: fullmat_MPI(:) ! massage for the dense blocks
@@ -379,7 +379,7 @@
integer forwardN15flag ! 1 use N^1.5 algorithm. 0: use NlogN pseudo skeleton algorithm
real(kind=8) tol_comp ! matrix construction tolerance
integer::Nmin_leaf ! leaf sizes of HODLR tree
- integer nogeo
+ integer nogeo
integer xyzsort ! clustering methods given geometrical points: CKD: cartesian kd tree SKD: spherical kd tree (only for 3D points) TM: (2 mins no recursive)
integer::RecLR_leaf ! bottom level operations in a recursive merge-based LR compression: SVD, RRQR, ACA, BACA
real(kind=8):: near_para ! parameters used to determine whether two groups are nearfield or farfield pair
diff -u -r a/SRC/BPACK_utilities.f90 b/SRC/BPACK_utilities.f90
--- a/SRC/BPACK_utilities.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_utilities.f90 2020-07-13 09:06:56.000000000 +0900
@@ -478,6 +478,8 @@
integer nargs,flag
character(len=1024) :: strings,strings1
+ integer, external :: iargc
+
nargs = iargc()
flag=1
do while(flag==1)
diff -u -r a/SRC/MISC_linkedlist.f90 b/SRC/MISC_linkedlist.f90
--- a/SRC/MISC_linkedlist.f90 2020-07-10 12:47:03.000000000 +0900
+++ b/SRC/MISC_linkedlist.f90 2020-07-10 15:43:40.000000000 +0900
@@ -283,8 +283,8 @@
! equal to the given complex value.
!
subroutine nod_assign_nod_to_nod( LHS, RHS )
-type(nod), intent(inout) :: LHS
-type(nod), intent(in) :: RHS
+type(nod), intent(inout), target :: LHS
+type(nod), intent(in), target :: RHS
type(nod),pointer:: cur
class(*),pointer :: ptrl,ptrr,ptr
integer ii
@@ -300,9 +300,11 @@
if(allocated(RHS%item))then
allocate(LHS%item, source=RHS%item)
- select TYPE(ptrr=>RHS%item)
+ ptrr=>RHS%item
+ select TYPE(ptrr)
type is (list)
- select TYPE(ptrl=>LHS%item)
+ ptrl=>LHS%item
+ select TYPE(ptrl)
type is (list)
ptrl%num_nods=0
ptrl%head=>null()

View File

@ -0,0 +1,362 @@
diff -u -r -N a/SRC/Bplus_factor.f90 b/SRC/Bplus_factor.f90
--- a/SRC/Bplus_factor.f90 2020-07-16 17:19:24.000000000 +0900
+++ b/SRC/Bplus_factor.f90 2020-07-22 17:29:47.000000000 +0900
@@ -18,6 +18,7 @@
module Bplus_factor
use Bplus_compress
use Bplus_randomizedop
+use ieee_arithmetic
contains
@@ -1007,7 +1008,7 @@
exit
endif
- if(isnan(error_inout))then
+ if(ieee_support_nan() .and. ieee_is_nan(error_inout))then
converged=0
exit
endif
diff -u -r -N a/SRC/Bplus_randomized.f90 b/SRC/Bplus_randomized.f90
--- a/SRC/Bplus_randomized.f90 2020-07-16 17:22:35.000000000 +0900
+++ b/SRC/Bplus_randomized.f90 2020-07-22 17:31:04.000000000 +0900
@@ -20,6 +20,7 @@
use MISC_Utilities
use BPACK_Utilities
+use ieee_arithmetic
contains
@@ -1615,7 +1616,7 @@
Vout(1:mm,1:num_vect_sub) = Vin(1:mm,1:num_vect_sub)- Vout(1:mm,1:num_vect_sub)
Vout(1+mm:N,1:num_vect_sub) = Vin(1+mm:N,1:num_vect_sub)
- if(isnan(fnorm(Vout,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vout,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD11N'
stop
end if
@@ -1628,7 +1629,7 @@
&Vout(1+mm:mm+nn,1:num_vect_sub),Vin(1+mm:mm+nn,1:num_vect_sub),ctemp1,ctemp2,ptree,stats)
Vin = Vout + Vin
- if(isnan(fnorm(Vin,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vin,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD22N'
stop
end if
@@ -1640,7 +1641,7 @@
&Vbuff, Vout(1+mm:mm+nn,1:num_vect_sub),ctemp1,ctemp2,ptree,stats)
Vout(1+mm:mm+nn,1:num_vect_sub) = Vout(1+mm:mm+nn,1:num_vect_sub) + Vbuff
- if(isnan(fnorm(Vout,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vout,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD33N'
stop
end if
@@ -1660,7 +1661,7 @@
Vout(1:mm,1:num_vect_sub) = Vin(1:mm,1:num_vect_sub) - Vout(1:mm,1:num_vect_sub)
Vout(1+mm:N,1:num_vect_sub) = Vin(1+mm:N,1:num_vect_sub)
- if(isnan(fnorm(Vout,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vout,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD11T'
stop
end if
@@ -1671,7 +1672,7 @@
&Vout(1+mm:mm+nn,1:num_vect_sub),Vin(1+mm:mm+nn,1:num_vect_sub),ctemp1,ctemp2,ptree,stats)
Vin = Vout + Vin
- if(isnan(fnorm(Vin,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vin,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD22T'
stop
end if
@@ -1682,7 +1683,7 @@
&Vbuff,Vout(1+mm:N,1:num_vect_sub),ctemp1,ctemp2,ptree,stats)
Vout(1+mm:N,1:num_vect_sub) = Vout(1+mm:N,1:num_vect_sub)+Vbuff
- if(isnan(fnorm(Vout,N,num_vect_sub)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Vout,N,num_vect_sub)))then
write(*,*)fnorm(Vin,N,num_vect_sub),fnorm(Vout,N,num_vect_sub),'ABCD33T'
stop
end if
diff -u -r -N a/SRC/Bplus_utilities.f90 b/SRC/Bplus_utilities.f90
--- a/SRC/Bplus_utilities.f90 2020-07-16 17:26:34.000000000 +0900
+++ b/SRC/Bplus_utilities.f90 2020-07-22 17:44:12.000000000 +0900
@@ -17,6 +17,7 @@
#include "ButterflyPACK_config.fi"
module Bplus_Utilities
use MISC_Utilities
+use ieee_arithmetic
contains
@@ -1144,7 +1145,7 @@
stop
end if
-BF_checkNAN = isnan(temp)
+BF_checkNAN = ieee_support_nan() .and. ieee_is_nan(temp)
end function BF_checkNAN
@@ -1416,7 +1417,7 @@
- if(isnan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank,dimension_n)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank,dimension_n)))then
write(*,*)'NAN in L 1'
end if
@@ -1431,7 +1432,7 @@
call gemmf90(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j)%matrix,rank, block_i%ButterflyV%blocks(2*index_j)%matrix,dimension_n,&
block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank, 'N','T',rank,dimension_n,nn,cone,czero)
- if(isnan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,dimension_n)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,dimension_n)))then
write(*,*)'NAN in L 2'
end if
@@ -1443,7 +1444,7 @@
allocate(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix(rank,nn))
block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix = block_i%ButterflyKerl(level)%blocks(index_i,2*index_j-1)%matrix
- if(isnan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank,nn)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank,nn)))then
write(*,*)'NAN in L 3'
end if
@@ -1452,7 +1453,7 @@
allocate(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix(rank,nn))
block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix = block_i%ButterflyKerl(level)%blocks(index_i,2*index_j)%matrix
- if(isnan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,nn)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,nn)))then
write(*,*)'NAN in L 4'
end if
@@ -1469,7 +1470,7 @@
allocate(block_o%ButterflyU%blocks(index_i+index_i_start)%matrix(mm,rank))
block_o%ButterflyU%blocks(index_i+index_i_start)%matrix = block_i%ButterflyU%blocks(index_i)%matrix
if(present(memory))memory = memory + SIZEOF(block_o%ButterflyU%blocks(index_i+index_i_start)%matrix)/1024.0d3
- if(isnan(fnorm(block_o%ButterflyU%blocks(index_i+index_i_start)%matrix,mm,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyU%blocks(index_i+index_i_start)%matrix,mm,rank)))then
write(*,*)'NAN in L 5'
end if
endif
@@ -1501,7 +1502,7 @@
! write(*,*)'good 1.1'
- if(isnan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,dimension_m,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,dimension_m,rank)))then
write(*,*)'NAN in R 1'
end if
@@ -1517,7 +1518,7 @@
block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,'N','N',dimension_m,rank,mm,cone,czero)
! write(*,*)'good 2'
- if(isnan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,rank)))then
write(*,*)'NAN in R 2'
end if
else
@@ -1528,7 +1529,7 @@
allocate(block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix(mm,rank))
block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix = block_i%ButterflyKerl(level)%blocks(2*index_i-1,index_j)%matrix
- if(isnan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,mm,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,mm,rank)))then
write(*,*)'NAN in R 3'
end if
@@ -1539,7 +1540,7 @@
allocate(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix(mm,rank))
block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix = block_i%ButterflyKerl(level)%blocks(2*index_i,index_j)%matrix
! write(*,*)'good 4'
- if(isnan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,mm,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,mm,rank)))then
write(*,*)'NAN in R 4'
end if
end if
@@ -1556,7 +1557,7 @@
block_o%ButterflyV%blocks(index_j+index_j_start)%matrix = block_i%ButterflyV%blocks(index_j)%matrix
if(present(memory))memory = memory + SIZEOF(block_o%ButterflyV%blocks(index_j+index_j_start)%matrix)/1024.0d3
- if(isnan(fnorm(block_o%ButterflyV%blocks(index_j+index_j_start)%matrix,nn,rank)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(block_o%ButterflyV%blocks(index_j+index_j_start)%matrix,nn,rank)))then
write(*,*)'NAN in R 5'
end if
endif
@@ -5129,7 +5130,7 @@
if (chara=='N') then
- if(isnan(sum(abs(random1(:,1))**2)))then
+ if(ieee_support_nan() .and. ieee_is_nan(sum(abs(random1(:,1))**2)))then
write(*,*)'NAN in 1 BF_block_MVP_dat'
stop
end if
@@ -5470,7 +5471,7 @@
enddo
!$omp end parallel do
- if(isnan(sum(abs(random2(:,1))**2)))then
+ if(ieee_support_nan() .and. ieee_is_nan(sum(abs(random2(:,1))**2)))then
write(*,*)'NAN in 2 BF_block_MVP_dat',blocks%row_group,blocks%col_group,blocks%level,blocks%level_butterfly
stop
end if
@@ -5904,7 +5905,7 @@
if (chara=='N') then
- if(isnan(sum(abs(VectIn(:,1))**2)))then
+ if(ieee_support_nan() .and. ieee_is_nan(sum(abs(VectIn(:,1))**2)))then
write(*,*)'NAN in 1 BF_block_MVP_partial'
stop
end if
@@ -7995,10 +7996,10 @@
allocate(Singular(mn_min))
call copymatT(blocks%ButterflyV%blocks(j)%matrix,matrixtemp,dimension_n,rank)
- call assert(.not. isnan(fnorm(matrixtemp,rank,dimension_n)),'matrixtemp NAN at 3')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(fnorm(matrixtemp,rank,dimension_n))),'matrixtemp NAN at 3')
call gesvd_robust(matrixtemp,Singular,UU,VV,rank,dimension_n,mn_min)
- call assert(.not. isnan(sum(Singular)),'Singular NAN at 3')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(sum(Singular))),'Singular NAN at 3')
do ii=1,mn_min
UU(:,ii) = UU(:,ii)*Singular(ii)
@@ -8060,9 +8061,9 @@
matrixtemp(1:rank,1:nn1) = blocks%ButterflyKerl(level)%blocks(i,j)%matrix
! call copymatN(blocks%ButterflyKerl(level)%blocks(i,j+1)%matrix,matrixtemp(1:rank,1+nn1:nn2+nn1),rank,nn2)
matrixtemp(1:rank,1+nn1:nn2+nn1) = blocks%ButterflyKerl(level)%blocks(i,j+1)%matrix
- call assert(.not. isnan(fnorm(matrixtemp,rank,nn1+nn2)),'matrixtemp NAN at 4')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(fnorm(matrixtemp,rank,nn1+nn2))),'matrixtemp NAN at 4')
call gesvd_robust(matrixtemp,Singular,UU,VV,rank,nn1+nn2,mn_min)
- call assert(.not. isnan(sum(Singular)),'Singular NAN at 4')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(sum(Singular))),'Singular NAN at 4')
do ii=1,mn_min
UU(:,ii) = UU(:,ii)*Singular(ii)
@@ -8174,10 +8175,10 @@
! call copymatN(blocks%ButterflyU%blocks(i)%matrix,matrixtemp,dimension_m,rank)
matrixtemp = blocks%ButterflyU%blocks(i)%matrix
- call assert(.not. isnan(fnorm(matrixtemp,dimension_m,rank)),'matrixtemp NAN at 1')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(fnorm(matrixtemp,dimension_m,rank))),'matrixtemp NAN at 1')
call gesvd_robust(matrixtemp,Singular,UU,VV,dimension_m,rank,mn_min)
- call assert(.not. isnan(sum(Singular)),'Singular NAN at 1')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(sum(Singular))),'Singular NAN at 1')
do ii=1,mn_min
VV(ii,:) = VV(ii,:)*Singular(ii)
@@ -8239,7 +8240,7 @@
matrixtemp(1:mm1,1:rank) = blocks%ButterflyKerl(level)%blocks(i,j)%matrix
! call copymatN(blocks%ButterflyKerl(level)%blocks(i+1,j)%matrix,matrixtemp(1+mm1:mm2+mm1,1:rank),mm2,rank)
matrixtemp(1+mm1:mm2+mm1,1:rank) = blocks%ButterflyKerl(level)%blocks(i+1,j)%matrix
- call assert(.not. isnan(fnorm(matrixtemp,mm1+mm2,rank)),'matrixtemp NAN at 2')
+ call assert(.not. (ieee_support_nan() .and. ieee_is_nan(fnorm(matrixtemp,mm1+mm2,rank))),'matrixtemp NAN at 2')
call gesvd_robust(matrixtemp,Singular,UU,VV,mm1+mm2,rank,mn_min)
! if(isnan(sum(Singular)).and. mm1+mm2<rank)then
@@ -8247,7 +8248,7 @@
! end if
! call assert(.not. isnan(sum(Singular)),'Singular NAN at 2')
- if(isnan(sum(Singular)))then
+ if(ieee_support_nan() .and. ieee_is_nan(sum(Singular)))then
write(*,*)'Singular NAN at 2',mm1+mm2,rank
do ii=1,mm1+mm2
do jj=1,rank
diff -u -r -N a/SRC/MISC_utilities.f90 b/SRC/MISC_utilities.f90
--- a/SRC/MISC_utilities.f90 2020-07-16 17:29:34.000000000 +0900
+++ b/SRC/MISC_utilities.f90 2020-07-22 17:45:29.000000000 +0900
@@ -33,7 +33,7 @@
use omp_lib
use MISC_DenseLA
use BPACK_linkedlist
-
+use ieee_arithmetic
integer, parameter :: int64 = selected_int_kind(18)
@@ -131,7 +131,7 @@
isnanMat = .false.
do ii =1,m
do jj =1,n
- isnanMat = isnanMat .or. isnan(abs(A(ii,jj)))
+ isnanMat = isnanMat .or. (ieee_support_nan() .and. ieee_is_nan(abs(A(ii,jj))))
end do
end do
end function isnanMat
@@ -545,7 +545,7 @@
allocate(Singular(mn))
-if(isnan(fnorm(mat,M,N)))then
+if(ieee_support_nan() .and. ieee_is_nan(fnorm(mat,M,N)))then
write(*,*)'input matrix NAN in GetRank'
stop
end if
@@ -557,7 +557,7 @@
rank = 1
deallocate(UU,VV,Singular)
else
- if(isnan(sum(Singular)))then
+ if(ieee_support_nan() .and. ieee_is_nan(sum(Singular)))then
deallocate(UU,VV,Singular)
write(*,*)'gesvd_robust wrong in GetRank, switching to QR'
@@ -578,7 +578,7 @@
! RRQR
jpvt = 0
call geqp3f90(Atmp,jpvt,tau,flop)
- if(isnan(fnorm(Atmp,mnl,mn)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(Atmp,mnl,mn)))then
write(*,*)'Q or R has NAN in GetRank'
stop
end if
@@ -718,7 +718,7 @@
deallocate(jpiv)
deallocate(JPERM)
- if(isnan(fnorm(mat2D,myArows,myAcols)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(mat2D,myArows,myAcols)))then
write(*,*)'Q or R has NAN in PComputeRange'
stop
end if
@@ -753,7 +753,7 @@
if(present(Flops))Flops=0d0
mn=min(M,N)
- if(isnan(fnorm(mat,M,N)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(mat,M,N)))then
write(*,*)'input matrix NAN in ComputeRange'
stop
end if
@@ -781,7 +781,7 @@
if(present(Flops))Flops = Flops + flop
rank=mn
endif
- if(isnan(fnorm(mat,M,N)))then
+ if(ieee_support_nan() .and. ieee_is_nan(fnorm(mat,M,N)))then
write(*,*)'Q or R has NAN in ComputeRange'
stop
end if
@@ -1586,7 +1586,7 @@
do ii = 1,k
- if(isnan(abs(sum(x(:,ii)))))then
+ if(ieee_support_nan() .and. ieee_is_nan(abs(sum(x(:,ii)))))then
! do jj =1,rank
! write(*,*)jj,'hh',A_tmp_rank(jj,:)
! end do

View File

@ -0,0 +1,502 @@
diff -u -r a/EXAMPLE/EMSURF_Module.f90 b/EXAMPLE/EMSURF_Module.f90
--- a/EXAMPLE/EMSURF_Module.f90 2020-07-13 09:36:52.000000000 +0900
+++ b/EXAMPLE/EMSURF_Module.f90 2020-07-16 17:39:15.000000000 +0900
@@ -1414,7 +1414,8 @@
quant%maxedgelength = 0
do edge=1,Maxedge
quant%maxedgelength = max(quant%maxedgelength,sqrt(sum(abs(quant%xyz(:,quant%info_unk(1,edge))-quant%xyz(:,quant%info_unk(2,edge)))**2)))
- if(sqrt(sum(abs(quant%xyz(:,quant%info_unk(1,edge))-quant%xyz(:,quant%info_unk(2,edge)))**2))>0.9d0)write(*,*)edge,sqrt(sum(abs(quant%xyz(:,quant%info_unk(1,edge))-quant%xyz(:,quant%info_unk(2,edge)))**2)),quant%info_unk(1,edge),quant%info_unk(2,edge),quant%xyz(:,quant%info_unk(1,edge)),quant%xyz(:,quant%info_unk(2,edge))
+ if(sqrt(sum(abs(quant%xyz(:,quant%info_unk(1,edge))-quant%xyz(:,quant%info_unk(2,edge)))**2))>0.9d0)write(*,*)edge,&
+ sqrt(sum(abs(quant%xyz(:,quant%info_unk(1,edge))-quant%xyz(:,quant%info_unk(2,edge)))**2)),quant%info_unk(1,edge),quant%info_unk(2,edge),quant%xyz(:,quant%info_unk(1,edge)),quant%xyz(:,quant%info_unk(2,edge))
end do
diff -u -r a/SRC/BPACK_constr.f90 b/SRC/BPACK_constr.f90
--- a/SRC/BPACK_constr.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_constr.f90 2020-07-17 14:25:38.000000000 +0900
@@ -536,13 +536,13 @@
level_butterfly=ho_bf1%levels(level_c)%BP(ii)%LL(1)%matrices_block(1)%level_butterfly
t1=OMP_GET_WTIME()
- call BF_randomized(ho_bf1%levels(level_c)%BP(ii)%LL(1)%matrices_block(1)%pgno,level_butterfly,rank0_outter,rankrate_outter,ho_bf1%levels(level_c)%BP(ii)%LL(1)%matrices_block(1),ho_bf1%levels(level_c)%BP(ii),Bplus_block_MVP_Exact_dat,error,'Exact',option,stats,ptree,msh)
+ call BF_randomized(ho_bf1%levels(level_c)%BP(ii)%LL(1)%matrices_block(1)%pgno,&
+ level_butterfly,rank0_outter,rankrate_outter,ho_bf1%levels(level_c)%BP(ii)%LL(1)%matrices_block(1),ho_bf1%levels(level_c)%BP(ii),Bplus_block_MVP_Exact_dat,error,'Exact',option,stats,ptree,msh)
t2=OMP_GET_WTIME()
tim_tmp = tim_tmp + t2 - t1
- ! call Bplus_randomized_constr(level_butterfly,ho_bf1%levels(level_c)%BP(ii),ho_bf1%levels(level_c)%BP(ii),rank0_inner,rankrate_inner,Bplus_block_MVP_Exact_dat,rank0_outter,rankrate_outter,Bplus_block_MVP_Outter_Exact_dat,error,'Exact',option,stats,ptree,msh)
if(ptree%MyID==Main_ID .and. option%verbosity>=0)write (*,*)'time_tmp',time_tmp,'randomized_bf time,', tim_tmp,'stats%Time_random,',stats%Time_random
diff -u -r a/SRC/BPACK_defs.f90 b/SRC/BPACK_defs.f90
--- a/SRC/BPACK_defs.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_defs.f90 2020-07-17 14:24:58.000000000 +0900
@@ -245,7 +245,7 @@
! integer data_type ! the block data_type, need better documentation later
! integer nested_num ! depreciated
integer,allocatable :: ipiv(:) ! permutation of the LU of the dense diagonal blocks
- integer blockinfo_MPI(MPI_Header) ! high-level data extracted from the index message: 1. level 2. row_group 3. col_group 4. nested_num(depreciated) 5. style 6. prestyle(depreciated) 7. data_type(depreciated) 8. level_butterfly 9. length_Butterfly_index_MPI 10. length_Butterfly_data_MPI 11. memory (depreciated)
+ integer blockinfo_MPI(MPI_Header)
integer length_Butterfly_index_MPI ! length of the index message, the first INDEX_Header integers are 1. decpreciated 2. rankmax 3. level_butterfly. 4. num_blocks
integer length_Butterfly_data_MPI ! length of the value message
DT,allocatable :: fullmat_MPI(:) ! massage for the dense blocks
@@ -379,7 +379,7 @@
integer forwardN15flag ! 1 use N^1.5 algorithm. 0: use NlogN pseudo skeleton algorithm
real(kind=8) tol_comp ! matrix construction tolerance
integer::Nmin_leaf ! leaf sizes of HODLR tree
- integer nogeo ! 1: no geometrical information available to hodlr, use NATUTAL or TM_GRAM clustering 0: geometrical points are available for TM or CKD clustering 2: no geometrical information available, but a user-defined distance function and compressibility function is provided. 3: no geometrical information available, but an array of knn*N indicating the knn neighbours of each element is provided
+ integer nogeo
integer xyzsort ! clustering methods given geometrical points: CKD: cartesian kd tree SKD: spherical kd tree (only for 3D points) TM: (2 mins no recursive)
integer::RecLR_leaf ! bottom level operations in a recursive merge-based LR compression: SVD, RRQR, ACA, BACA
real(kind=8):: near_para ! parameters used to determine whether two groups are nearfield or farfield pair
diff -u -r a/SRC/BPACK_factor.f90 b/SRC/BPACK_factor.f90
--- a/SRC/BPACK_factor.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_factor.f90 2020-07-16 17:41:25.000000000 +0900
@@ -271,7 +271,8 @@
endif
if(stats%Add_random_CNT(level)+stats%Mul_random_CNT(level)+stats%XLUM_random_CNT(level)/=0)then
- write (*,'(A7,I5,A17,I5,A7,I8,Es10.2,A7,I8,Es10.2,A12,I8,Es10.2)') " level:",level,'level_butterfly:',level_butterfly,'add:',stats%Add_random_CNT(level),stats%Add_random_Time(level),'mul:',stats%Mul_random_CNT(level),stats%Mul_random_Time(level),'XLUM:',stats%XLUM_random_CNT(level),stats%XLUM_random_time(level)
+ write (*,'(A7,I5,A17,I5,A7,I8,Es10.2,A7,I8,Es10.2,A12,I8,Es10.2)') " level:",level,'level_butterfly:',level_butterfly,&
+ 'add:',stats%Add_random_CNT(level),stats%Add_random_Time(level),'mul:',stats%Mul_random_CNT(level),stats%Mul_random_Time(level),'XLUM:',stats%XLUM_random_CNT(level),stats%XLUM_random_time(level)
endif
enddo
! write(*,*)'max inverse butterfly rank:', butterflyrank_inverse
diff -u -r a/SRC/BPACK_randomized.f90 b/SRC/BPACK_randomized.f90
--- a/SRC/BPACK_randomized.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_randomized.f90 2020-07-16 17:43:00.000000000 +0900
@@ -997,7 +997,8 @@
call Full_block_MVP_dat(block_rand(bb_inv-Bidxs+1),'N',idx_end_loc-idx_start_loc+1,num_vect,&
&RandomVectors_InOutput(1)%vector(idx_start_loc:idx_end_loc,1:num_vect),RandomVectors_InOutput(2)%vector(idx_start_loc:idx_end_loc,1:num_vect),cone,czero)
else
- call BF_block_MVP_twoforward_dat(ho_bf1,level_c,bb_inv,block_rand,'N',idx_end_loc-idx_start_loc+1,num_vect,RandomVectors_InOutput(1)%vector(idx_start_loc:idx_end_loc,1:num_vect),RandomVectors_InOutput(2)%vector(idx_start_loc:idx_end_loc,1:num_vect),cone,czero,ptree,stats)
+ call BF_block_MVP_twoforward_dat(ho_bf1,level_c,bb_inv,block_rand,'N',idx_end_loc-idx_start_loc+1,&
+ num_vect,RandomVectors_InOutput(1)%vector(idx_start_loc:idx_end_loc,1:num_vect),RandomVectors_InOutput(2)%vector(idx_start_loc:idx_end_loc,1:num_vect),cone,czero,ptree,stats)
endif
end do
@@ -1324,13 +1325,13 @@
!!!!!***** this subroutine is part of the randomized HODLR_BF.
-! The difference between this subroutine and BF_Resolving_Butterfly_LL_dat is that this subroutine requires redistribution of RandVectIn and RandVectOut to match the data layout of block_rand(bb_inv*2-1-Bidxs+1) and block_rand(bb_inv*2-Bidxs+1). Therefore this subroutine reconstructs two neighbouring butterflies together.
subroutine BF_Resolving_Butterfly_LL_dat_twoforward(ho_bf1,level_c,num_vect_sub,nth_s,nth_e,Ng,level,Bidxs,bb_inv,block_rand,RandVectIn,RandVectOut,option,ptree,msh,stats)
use BPACK_DEFS
implicit none
integer level,level_c, ii, bb,bb_inv
DT :: RandVectIn(:,:),RandVectOut(:,:)
- DT,pointer :: mat1(:,:),mat2(:,:),mat(:,:),matQ1(:,:),matQ2(:,:),matQ(:,:),matQ2D(:,:),matQcA_trans1(:,:),matQcA_trans2(:,:),matQcA_trans(:,:),matQcA_trans2D(:,:),matQUt2D(:,:),UU(:,:),VV(:,:),mattemp(:,:),matOut1(:,:),matOut2(:,:),matOut(:,:),matIn1(:,:),matIn2(:,:),matIn(:,:)
+ DT,pointer :: mat1(:,:),mat2(:,:),mat(:,:),matQ1(:,:),matQ2(:,:),matQ(:,:),matQ2D(:,:),matQcA_trans1(:,:),matQcA_trans2(:,:),&
+ matQcA_trans(:,:),matQcA_trans2D(:,:),matQUt2D(:,:),UU(:,:),VV(:,:),mattemp(:,:),matOut1(:,:),matOut2(:,:),matOut(:,:),matIn1(:,:),matIn2(:,:),matIn(:,:)
type(matrixblock),pointer::block_o,block_inv,block_schur,block_off1,block_off2,block_off
integer groupn,groupm,mm(2),nn(2),ierr,nin1,nout1,nin2,nout2,offM(2),offN(2),offout1,offout2,rank
type(hobf)::ho_bf1
@@ -1419,13 +1420,13 @@
!!!!!***** this subroutine is part of the randomized HODLR_BF.
-! The difference between this subroutine and BF_Resolving_Butterfly_RR_dat is that this subroutine requires redistribution of RandVectIn and RandVectOut to match the data layout of block_rand(bb_inv*2-1-Bidxs+1) and block_rand(bb_inv*2-Bidxs+1). Therefore this subroutine reconstructs two neighbouring butterflies together.
subroutine BF_Resolving_Butterfly_RR_dat_twoforward(ho_bf1,level_c,num_vect_sub,nth_s,nth_e,Ng,level,Bidxs,bb_inv,block_rand,RandVectIn,RandVectOut,option,ptree,msh,stats)
use BPACK_DEFS
implicit none
integer level,level_c, ii, bb,bb_inv
DT :: RandVectIn(:,:),RandVectOut(:,:)
- DT,pointer :: mat1(:,:),mat2(:,:),mat(:,:),matQ1(:,:),matQ2(:,:),matQ(:,:),matQ2D(:,:),matQcA_trans1(:,:),matQcA_trans2(:,:),matQcA_trans(:,:),matQcA_trans2D(:,:),matQUt2D(:,:),UU(:,:),VV(:,:),mattemp(:,:),matOut1(:,:),matOut2(:,:),matOut(:,:),matIn1(:,:),matIn2(:,:),matIn(:,:)
+ DT,pointer :: mat1(:,:),mat2(:,:),mat(:,:),matQ1(:,:),matQ2(:,:),matQ(:,:),matQ2D(:,:),matQcA_trans1(:,:),matQcA_trans2(:,:),&
+ matQcA_trans(:,:),matQcA_trans2D(:,:),matQUt2D(:,:),UU(:,:),VV(:,:),mattemp(:,:),matOut1(:,:),matOut2(:,:),matOut(:,:),matIn1(:,:),matIn2(:,:),matIn(:,:)
type(matrixblock),pointer::block_o,block_inv,block_schur,block_off1,block_off2,block_off
integer groupn,groupm,mm(2),nn(2),ierr,nin1,nout1,nin2,nout2,offM(2),offN(2),offout1,offout2,rank
type(hobf)::ho_bf1
diff -u -r a/SRC/BPACK_structure.f90 b/SRC/BPACK_structure.f90
--- a/SRC/BPACK_structure.f90 2020-07-10 16:23:52.000000000 +0900
+++ b/SRC/BPACK_structure.f90 2020-07-17 14:22:48.000000000 +0900
@@ -1612,7 +1612,6 @@
! do ll=1,ho_bf1%levels(level_c)%BP(ii)%Lplus
! ! write(*,*)ho_bf1%levels(level_c)%BP(ii)%LL(ll)%Nbound,'ddd'
! do bb = 1,ho_bf1%levels(level_c)%BP(ii)%LL(ll)%Nbound
- ! write(177,'(I3,I7,I3,I3,'//TRIM(strings)//'Es16.7)')level_c,ii,ll,ho_bf1%levels(level_c)%BP(ii)%LL(ll)%matrices_block(bb)%level,msh%basis_group(ho_bf1%levels(level_c)%BP(ii)%LL(ll)%matrices_block(bb)%row_group)%center(1:dimn),msh%basis_group(ho_bf1%levels(level_c)%BP(ii)%LL(ll)%matrices_block(bb)%col_group)%center(1:dimn)
! end do
! end do
! ! end if
diff -u -r a/SRC/Bplus_compress.f90 b/SRC/Bplus_compress.f90
--- a/SRC/Bplus_compress.f90 2020-07-10 15:50:51.000000000 +0900
+++ b/SRC/Bplus_compress.f90 2020-07-16 17:47:42.000000000 +0900
@@ -3399,7 +3399,8 @@
DT,allocatable:: UU(:,:), VV(:,:),matU(:,:),matV(:,:),matU1(:,:),matV1(:,:),matU2(:,:),matV2(:,:),tmp(:,:),matU1D(:,:),matV1D(:,:),Vin(:,:),Vout1(:,:),Vout2(:,:),Vinter(:,:),Fullmat(:,:),QQ1(:,:),matU2D(:,:),matV2D(:,:)
real(kind=8),allocatable::Singular(:)
integer nsproc1,nsproc2,nprow,npcol,nprow1D,npcol1D,myrow,mycol,nprow1,npcol1,myrow1,mycol1,nprow2,npcol2,myrow2,mycol2,myArows,myAcols,M1,N1,M2,N2,rank1,rank2,ierr
- integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
+ integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),&
+ descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
integer dims(6),dims_tmp(6) ! M1,N1,rank1,M2,N2,rank2
DT:: TEMP(1)
integer LWORK,mnmax,mnmin,rank_new
@@ -3820,7 +3821,8 @@
DT,allocatable:: UU(:,:), VV(:,:),matU(:,:),matV(:,:),matU1(:,:),matV1(:,:),matU2(:,:),matV2(:,:),tmp(:,:),matU1D(:,:),matV1D(:,:),Vin(:,:),Vout1(:,:),Vout2(:,:),Vinter(:,:),Fullmat(:,:),QQ1(:,:),matU2D(:,:),matV2D(:,:)
real(kind=8),allocatable::Singular(:)
integer nsproc1,nsproc2,nprow,npcol,nprow1D,npcol1D,myrow,mycol,nprow1,npcol1,myrow1,mycol1,nprow2,npcol2,myrow2,mycol2,myArows,myAcols,M1,N1,M2,N2,rank1,rank2,ierr
- integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
+ integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),&
+ descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
integer dims(6),dims_tmp(6) ! M1,N1,rank1,M2,N2,rank2
DT:: TEMP(1)
integer LWORK,mnmax,mnmin,rank_new
diff -u -r a/SRC/Bplus_factor.f90 b/SRC/Bplus_factor.f90
--- a/SRC/Bplus_factor.f90 2020-07-10 15:50:51.000000000 +0900
+++ b/SRC/Bplus_factor.f90 2020-07-16 17:48:41.000000000 +0900
@@ -309,7 +309,6 @@
allocate(matU(block_o%M_loc,rank))
matU = block_o%ButterflyU%blocks(1)%matrix
- ! write(*,*)fnorm(block_o%ButterflyV%blocks(1)%matrix,size(block_o%ButterflyV%blocks(1)%matrix,1),size(block_o%ButterflyV%blocks(1)%matrix,2)),fnorm(block_o%ButterflyU%blocks(1)%matrix,size(block_o%ButterflyU%blocks(1)%matrix,1),size(block_o%ButterflyU%blocks(1)%matrix,2)),ptree%MyID,'re',shape(block_o%ButterflyV%blocks(1)%matrix),shape(block_o%ButterflyU%blocks(1)%matrix),shape(matrixtemp),isnanMat(block_o%ButterflyV%blocks(1)%matrix,size(block_o%ButterflyV%blocks(1)%matrix,1),size(block_o%ButterflyV%blocks(1)%matrix,2)),isnanMat(block_o%ButterflyU%blocks(1)%matrix,size(block_o%ButterflyU%blocks(1)%matrix,1),size(block_o%ButterflyU%blocks(1)%matrix,2))
call gemmf90(block_o%ButterflyV%blocks(1)%matrix,block_o%M_loc,block_o%ButterflyU%blocks(1)%matrix,block_o%M_loc,matrixtemp,rank,'T','N',rank,rank,block_o%M_loc,cone,czero,flop=flop)
stats%Flop_Factor = stats%Flop_Factor + flop
@@ -1360,10 +1359,12 @@
type(grid),pointer::gd
type(grid),pointer::gdc1,gdc2
integer:: cridx,info
- DT,allocatable:: UU(:,:),UU1(:,:),UU2(:,:), VV(:,:),VV1(:,:),VV2(:,:),SS1(:,:),TT1(:,:),matU(:,:),matV(:,:),matU1(:,:),matV1(:,:),matU2(:,:),matV2(:,:),tmp(:,:),matU1D(:,:),matV1D(:,:),Vin(:,:),Vout1(:,:),Vout2(:,:),Vinter(:,:),Fullmat(:,:),QQ1(:,:),matU2D(:,:),matV2D(:,:)
+ DT,allocatable:: UU(:,:),UU1(:,:),UU2(:,:),VV(:,:),VV1(:,:),VV2(:,:),SS1(:,:),TT1(:,:),matU(:,:),matV(:,:),matU1(:,:),matV1(:,:),matU2(:,:),matV2(:,:),tmp(:,:),matU1D(:,:),matV1D(:,:),&
+ Vin(:,:),Vout1(:,:),Vout2(:,:),Vinter(:,:),Fullmat(:,:),QQ1(:,:),matU2D(:,:),matV2D(:,:)
real(kind=8),allocatable::Singular(:)
integer nsproc1,nsproc2,nprow,npcol,nprow1D,npcol1D,myrow,mycol,nprow1,npcol1,myrow1,mycol1,nprow2,npcol2,myrow2,mycol2,myArows,myAcols,M1,N1,M2,N2,rank1,rank2,ierr
- integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
+ integer::descsmatU(9),descsmatV(9),descsmatU1(9),descsmatV1(9),descsmatU2(9),descsmatV2(9),descUU(9),descVV(9),descsmatU1c(9),descsmatU2c(9),descsmatV1c(9),descsmatV2c(9),&
+ descButterflyV(9),descButterflyU(9),descButterU1D(9),descButterV1D(9),descVin(9),descVout(9),descVinter(9),descFull(9)
integer dims(6),dims_tmp(6) ! M1,N1,rank1,M2,N2,rank2
DT:: TEMP(1)
integer LWORK,mnmax,mnmin,rank_new
@@ -2363,7 +2364,8 @@
rank_new_max = max(rank_new_max,Bplus%LL(ll)%rankmax)
end do
- if(option%verbosity>=1 .and. ptree%myid==ptree%pgrp(Bplus%LL(1)%matrices_block(1)%pgno)%head)write(*,'(A10,I5,A6,I3,A8,I3,A11,Es14.7)')'Mult No. ',rowblock,' rank:',rank_new_max,' L_butt:',Bplus%LL(1)%matrices_block(1)%level_butterfly,' error:',error_inout
+ if(option%verbosity>=1 .and. ptree%myid==ptree%pgrp(Bplus%LL(1)%matrices_block(1)%pgno)%head)&
+ write(*,'(A10,I5,A6,I3,A8,I3,A11,Es14.7)')'Mult No. ',rowblock,' rank:',rank_new_max,' L_butt:',Bplus%LL(1)%matrices_block(1)%level_butterfly,' error:',error_inout
endif
diff -u -r a/SRC/Bplus_randomized.f90 b/SRC/Bplus_randomized.f90
--- a/SRC/Bplus_randomized.f90 2020-07-10 15:50:51.000000000 +0900
+++ b/SRC/Bplus_randomized.f90 2020-07-16 17:49:25.000000000 +0900
@@ -821,7 +821,8 @@
call MPI_ALLREDUCE(MPI_IN_PLACE, error_inout, 1,MPI_double_precision, MPI_MAX, ptree%pgrp(pgno_large)%Comm,ierr)
call MPI_ALLREDUCE(MPI_IN_PLACE, block_rand(1)%rankmax, 1,MPI_integer, MPI_MAX, ptree%pgrp(pgno_large)%Comm,ierr)
- if(ptree%MyID==ptree%pgrp(blocks_o%pgno)%head .and. option%verbosity>=2)write(*,'(A38,A6,I3,A8,I2,A8,I3,A7,Es14.7,A9,I5,A8,I5)')' '//TRIM(strings)//' ',' rank:',block_rand(1)%rankmax,' Ntrial:',tt,' L_butt:',block_rand(1)%level_butterfly,' error:',error_inout,' #sample:',rank_pre_max,' #nproc:',ptree%pgrp(block_rand(1)%pgno)%nproc
+ if(ptree%MyID==ptree%pgrp(blocks_o%pgno)%head .and. option%verbosity>=2)write(*,'(A38,A6,I3,A8,I2,A8,I3,A7,Es14.7,A9,I5,A8,I5)')' '//TRIM(strings)//' ','&
+ rank:',block_rand(1)%rankmax,' Ntrial:',tt,' L_butt:',block_rand(1)%level_butterfly,' error:',error_inout,' #sample:',rank_pre_max,' #nproc:',ptree%pgrp(block_rand(1)%pgno)%nproc
!!!!*** terminate if 1. error small enough or 2. rank smaller than num_vec
if(error_inout>option%tol_rand .and. block_rand(1)%rankmax==rank_pre_max)then
@@ -4379,7 +4380,8 @@
allocate(Bplus_randomized%LL(ll)%matrices_block(Bplus_randomized%LL(ll)%Nbound))
do bb =1,Bplus_randomized%LL(ll)%Nbound
- call BF_Init_randomized(Bplus%LL(ll)%matrices_block(bb)%level_butterfly,Bplus%LL(ll)%rankmax,Bplus%LL(ll)%matrices_block(bb)%row_group,Bplus%LL(ll)%matrices_block(bb)%col_group,Bplus%LL(ll)%matrices_block(bb),Bplus_randomized%LL(ll)%matrices_block(bb),msh,ptree,option,1)
+ call BF_Init_randomized(Bplus%LL(ll)%matrices_block(bb)%level_butterfly,Bplus%LL(ll)%rankmax,Bplus%LL(ll)%matrices_block(bb)%row_group,&
+ Bplus%LL(ll)%matrices_block(bb)%col_group,Bplus%LL(ll)%matrices_block(bb),Bplus_randomized%LL(ll)%matrices_block(bb),msh,ptree,option,1)
end do
diff -u -r a/SRC/Bplus_utilities.f90 b/SRC/Bplus_utilities.f90
--- a/SRC/Bplus_utilities.f90 2020-07-10 15:50:51.000000000 +0900
+++ b/SRC/Bplus_utilities.f90 2020-07-16 17:57:34.000000000 +0900
@@ -1411,7 +1411,8 @@
allocate(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix(rank,dimension_n))
! call gemmNT_omp(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j-1)%matrix, block_i%ButterflyV%blocks(2*index_j-1)%matrix, &
! &block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank,dimension_n,nn)
- call gemmf90(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j-1)%matrix,rank, block_i%ButterflyV%blocks(2*index_j-1)%matrix,dimension_n, block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank, 'N','T',rank,dimension_n,nn,cone,czero)
+ call gemmf90(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j-1)%matrix,rank, block_i%ButterflyV%blocks(2*index_j-1)%matrix,dimension_n, &
+ block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j-1)%matrix,rank, 'N','T',rank,dimension_n,nn,cone,czero)
@@ -1427,7 +1428,8 @@
allocate(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix(rank,dimension_n))
! call gemmNT_omp(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j)%matrix, block_i%ButterflyV%blocks(2*index_j)%matrix, &
! &block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,dimension_n,nn)
- call gemmf90(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j)%matrix,rank, block_i%ButterflyV%blocks(2*index_j)%matrix,dimension_n, block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank, 'N','T',rank,dimension_n,nn,cone,czero)
+ call gemmf90(block_i%ButterflyKerl(level)%blocks(index_i,2*index_j)%matrix,rank, block_i%ButterflyV%blocks(2*index_j)%matrix,dimension_n,&
+ block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank, 'N','T',rank,dimension_n,nn,cone,czero)
if(isnan(fnorm(block_o%ButterflyKerl(level_butterfly-level_butterfly_loc+level)%blocks(index_i+index_i_start,2*index_j)%matrix,rank,dimension_n)))then
write(*,*)'NAN in L 2'
@@ -1494,7 +1496,8 @@
! call gemm_omp(block_i%ButterflyU%blocks(2*index_i-1)%matrix, block_i%ButterflyKerl(level)%blocks(2*index_i-1,index_j)%matrix,&
! &block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,dimension_m,rank,mm)
- call gemmf90(block_i%ButterflyU%blocks(2*index_i-1)%matrix,dimension_m,block_i%ButterflyKerl(level)%blocks(2*index_i-1,index_j)%matrix,mm,block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,dimension_m,'N','N',dimension_m,rank,mm,cone,czero)
+ call gemmf90(block_i%ButterflyU%blocks(2*index_i-1)%matrix,dimension_m,block_i%ButterflyKerl(level)%blocks(2*index_i-1,index_j)%matrix,mm,&
+ block_o%ButterflyKerl(level)%blocks(2*index_i-1,index_j+index_j_start)%matrix,dimension_m,'N','N',dimension_m,rank,mm,cone,czero)
! write(*,*)'good 1.1'
@@ -1510,7 +1513,8 @@
! call gemm_omp(block_i%ButterflyU%blocks(2*index_i)%matrix, block_i%ButterflyKerl(level)%blocks(2*index_i,index_j)%matrix,&
! &block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,rank,mm)
- call gemmf90(block_i%ButterflyU%blocks(2*index_i)%matrix,dimension_m,block_i%ButterflyKerl(level)%blocks(2*index_i,index_j)%matrix,mm,block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,'N','N',dimension_m,rank,mm,cone,czero)
+ call gemmf90(block_i%ButterflyU%blocks(2*index_i)%matrix,dimension_m,block_i%ButterflyKerl(level)%blocks(2*index_i,index_j)%matrix,mm,&
+ block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,'N','N',dimension_m,rank,mm,cone,czero)
! write(*,*)'good 2'
if(isnan(fnorm(block_o%ButterflyKerl(level)%blocks(2*index_i,index_j+index_j_start)%matrix,dimension_m,rank)))then
@@ -3018,7 +3022,8 @@
mode='C'
endif
- call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
+ call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or.&
+ (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
call GetLocalBlockRange(ptree,pgno_o,level_o,level_butterfly_o,idx_r,inc_r,nr,idx_c,inc_c,nc,mode)
@@ -3562,7 +3567,8 @@
mode='C'
endif
- call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
+ call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or.&
+ (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
num_row=2**level_o
@@ -3952,7 +3958,8 @@
mode='C'
endif
- call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
+ call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or.&
+ (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
call GetLocalBlockRange(ptree,pgno_o,level_o,level_butterfly_o,idx_r,inc_r,nr,idx_c,inc_c,nc,mode)
@@ -4308,7 +4315,8 @@
level_c_o=level_butterfly_c_o+1
endif
- call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
+ call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or.&
+ (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
call GetLocalBlockRange(ptree,pgno_o,level_c_o,level_butterfly_c_o,idx_r,inc_r,nr,idx_c,inc_c,nc,mode)
@@ -4705,7 +4713,8 @@
level_c_o=level_butterfly_c_o+1
endif
- call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
+ call assert((ptree%pgrp(pgno_i)%head<=ptree%pgrp(pgno_o)%head .and. ptree%pgrp(pgno_i)%tail>=ptree%pgrp(pgno_o)%tail) .or. &
+ (ptree%pgrp(pgno_o)%head<=ptree%pgrp(pgno_i)%head .and. ptree%pgrp(pgno_o)%tail>=ptree%pgrp(pgno_i)%tail),'pgno_i or pgno_o should be contained in the other')
call GetLocalBlockRange(ptree,pgno_o,level_c_o,level_butterfly_c_o,idx_r,inc_r,nr,idx_c,inc_c,nc,mode)
@@ -5211,7 +5220,8 @@
stats%Flop_Tmp = stats%Flop_Tmp + flops
else
flops=0
- !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,index_i_loc_s,index_i_loc_k, index_j_loc,index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,&
+ !$omp index_i_loc_s,index_i_loc_k, index_j_loc,index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr*nc
index_j_loc = (index_ij-1)/nr+1
@@ -5237,10 +5247,12 @@
allocate (BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix(mm,num_vectors))
BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn1,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn1,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn1,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn1,cone,cone,flop=flop)
flops = flops + flop
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc+1)%matrix,nn2,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn2,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc+1)%matrix,nn2,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn2,cone,cone,flop=flop)
flops = flops + flop
enddo
!$omp end parallel do
@@ -5367,7 +5379,8 @@
BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
flops = flops + flop
mm=size(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,1)
@@ -5378,14 +5391,16 @@
BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
flops = flops + flop
enddo
enddo
!$omp end parallel do
else
- !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,index_i_loc_s,index_i_loc_k, index_j_loc,index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,index_i_loc_s,index_i_loc_k, &
+ !$omp index_j_loc,index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr0*nc0
index_j_loc = (index_ij-1)/nr0+1 !index_i_loc is local index of column-wise ordering at current level
index_i_loc= mod(index_ij-1,nr0) + 1
@@ -5410,7 +5425,8 @@
BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
flops = flops + flop
mm=size(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,1)
@@ -5421,7 +5437,8 @@
BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s+1,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn,cone,cone,flop=flop)
flops = flops + flop
enddo
@@ -5546,7 +5563,8 @@
stats%Flop_Tmp = stats%Flop_Tmp + flops
else
flops=0
- !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,&
+ !$omp index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr*nc
index_j_loc = (index_ij-1)/nr+1
index_i_loc= mod(index_ij-1,nr) + 1 !index_i_loc is local index of column-wise ordering at current level
@@ -5570,9 +5588,11 @@
BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm1,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm1,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm1,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm1,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm1,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm1,cone,cone,flop=flop)
flops = flops + flop
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm2,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc+1,index_jj_loc)%matrix,mm2,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm2,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm2,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc+1,index_jj_loc)%matrix,mm2,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm2,cone,cone,flop=flop)
flops = flops + flop
enddo
@@ -5676,7 +5696,8 @@
flops=0
if(nr0>1 .and. inc_r0==1)then ! this special treatment makes sure two threads do not write to the same address simultaneously
- !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,index_i_loc0,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,&
+ !$omp index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,index_i_loc0,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr0*nc0/2
index_i_loc0 = (index_ij-1)/nc0+1
do ii=1,2
@@ -5705,7 +5726,8 @@
endif
! !$omp end critical
! write(*,*)index_ii_loc,index_jj_loc,shape(BFvec%vec(level_butterfly-level+1)%blocks),index_i_loc_s,index_j_loc_s,shape(BFvec%vec(level_butterfly-level+2)%blocks),'lv:',level,shape(blocks%ButterflyKerl(level)%blocks)
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
flops = flops + flop
@@ -5717,13 +5739,15 @@
BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
flops = flops + flop
enddo
enddo
!$omp end parallel do
else
- !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,&
+ !$omp index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr0*nc0
index_j_loc = (index_ij-1)/nr0+1
index_i_loc= mod(index_ij-1,nr0) + 1 !index_i_loc is local index of row-wise ordering at current level
@@ -5750,7 +5774,8 @@
endif
! !$omp end critical
! write(*,*)index_ii_loc,index_jj_loc,shape(BFvec%vec(level_butterfly-level+1)%blocks),index_i_loc_s,index_j_loc_s,shape(BFvec%vec(level_butterfly-level+2)%blocks),'lv:',level,shape(blocks%ButterflyKerl(level)%blocks)
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
flops = flops + flop
@@ -5762,7 +5787,8 @@
BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix=0
endif
! !$omp end critical
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s+1)%matrix,nn,'T','N',nn,num_vectors,mm,cone,cone,flop=flop)
flops = flops + flop
enddo
!$omp end parallel do
@@ -5976,7 +6002,8 @@
stats%Flop_Tmp = stats%Flop_Tmp + flops
else
flops=0
- !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,index_i_loc_s,index_i_loc_k, index_j_loc,index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc,index_i_loc_s,index_i_loc_k, index_j_loc,&
+ !$omp index_j_loc_s,index_j_loc_k,ij,ii,jj,kk,i,j,index_i,index_j,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr*nc
index_j_loc = (index_ij-1)/nr+1
@@ -6002,10 +6029,12 @@
allocate (BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix(mm,num_vectors))
BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn1,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn1,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc)%matrix,nn1,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn1,cone,cone,flop=flop)
flops = flops + flop
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc+1)%matrix,nn2,BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn2,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k+1)%matrix,mm,BFvec%vec(level)%blocks(index_ii_loc,index_jj_loc+1)%matrix,nn2,&
+ BFvec%vec(level+1)%blocks(index_i_loc_s,index_j_loc_s)%matrix,mm,'N','N',mm,num_vectors,nn2,cone,cone,flop=flop)
flops = flops + flop
enddo
!$omp end parallel do
@@ -6112,7 +6141,8 @@
stats%Flop_Tmp = stats%Flop_Tmp + flops
else
flops=0
- !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
+ !$omp parallel do default(shared) private(index_ij,ii,jj,kk,ctemp,i,j,index_i,index_j,index_i_loc,index_j_loc,index_ii,index_jj,&
+ !$omp index_ii_loc,index_jj_loc,index_i_loc_s,index_j_loc_s,index_i_loc_k,index_j_loc_k,mm,mm1,mm2,nn,nn1,nn2,flop) reduction(+:flops)
do index_ij=1, nr*nc
index_j_loc = (index_ij-1)/nr+1
index_i_loc= mod(index_ij-1,nr) + 1 !index_i_loc is local index of column-wise ordering at current level
@@ -6136,9 +6166,11 @@
BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix=0
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm1,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm1,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm1,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k,index_j_loc_k)%matrix,mm1,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc,index_jj_loc)%matrix,mm1,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm1,cone,cone,flop=flop)
flops = flops + flop
- call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm2,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc+1,index_jj_loc)%matrix,mm2,BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm2,cone,cone,flop=flop)
+ call gemmf90(blocks%ButterflyKerl(level)%blocks(index_i_loc_k+1,index_j_loc_k)%matrix,mm2,BFvec%vec(level_butterfly-level+1)%blocks(index_ii_loc+1,index_jj_loc)%matrix,mm2,&
+ BFvec%vec(level_butterfly-level+2)%blocks(index_i_loc_s,index_j_loc_s)%matrix,nn,'T','N',nn,num_vectors,mm2,cone,cone,flop=flop)
flops = flops + flop
enddo

View File

@ -40,6 +40,10 @@ class Butterflypack(CMakePackage):
depends_on('scalapack')
depends_on('arpack-ng')
patch('longline.patch', when='%fj')
patch('fjfortran.patch', when='%fj')
patch('isnan.patch', when='%fj')
def cmake_args(self):
spec = self.spec