Efficient tensor contractions by BLAS

103 Views Asked by At

C[a,d,e] = A[a,b,c] * B[d,b,c,e] The topic is relate to BLAS tensor contractions for two indexes together , but more complicated.

I can only use do loop to call gemm for many times. The data structure of C and B is not contiguous in current implementation.

I want to find a more efficient way to compute C[a,d,e] = A[a,b,c] * B[d,b,c,e]. Hopefully, call gemm only one time.

Program double_contractions_blas

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

  Implicit None

  Real( wp ), Dimension( :, :, : ), Allocatable :: a
  Real( wp ), Dimension( :, :, :, : ), Allocatable :: b
  Real( wp ), Dimension( :, :, : ), Allocatable :: c
  Real( wp ), Dimension( :, :    ), Allocatable :: d
  Real( wp ), Dimension( :, :    ), Allocatable :: e

  Integer :: na, nb, nc, nd, ne, nf, ng
  Integer :: i

  Integer( li ) :: start, finish, rate

  Write( *, * ) 'na, nb, nc, nd, ne ?'
  Read( *, * ) na, nb, nc, nd, ne

  allocate( a( na, nb, nc ) )
  allocate( b( nd, nb, nc, ne ) )
  allocate( c( na, nd, ne ) )

  nf = nd * ne
  ng = nb * nc
  Call Random_number( a )
  Call Random_number( b )

  Call System_clock( start, rate )
  Do i = 1, nd
    Call dgemm( 'N', 'N', na, ne, ng, 1.0_wp, a , Size( a , Dim = 1 ), &
                                              b(i,:,:,:) , ng, &
                                              0.0_wp, c(:,i,:), Size( c, Dim = 1 ) )
  end do

  Call System_clock( finish, rate )

  write( *, * ) c

  end program double_contractions_blas
0

There are 0 best solutions below