CMake Config for oneMKL
If you want to integrate oneMKL into your CMake projects, starting with the Intel® oneAPI Math Kernel Library (oneMKL) 2021.3 release, MKLConfig.cmake is provided as part of the package and installation. MKLConfig.cmake supports all oneMKL configurations, compilers, and runtimes, as the oneMKL product itself. Help/usage is provided in the top section of MKLConfig.cmake.
Example
my_test/ |_ build/ <-- Out-of-source build directory |_ CMakeLists.txt <-- User side project's CMakeLists.txt |_ app.c <-- Source file that uses oneMKL API
app.c
#include <stdio.h> #include "mkl.h" int main() { MKLVersion mkl_version; mkl_get_version(&mkl_version); printf("You are using oneMKL %d.%d\n", mkl_version.MajorVersion, mkl_version.UpdateVersion); return 0; }
CMakeLists.txt
cmake_minimum_required(VERSION 3.13) project(oneMKL_Example LANGUAGES C) find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT}) add_executable(myapp app.c) target_link_libraries(myapp PUBLIC MKL::MKL)
Command line
# Call the oneAPI setvars.bat/oneapi-vars.bat script beforehand build>cmake .. -G Ninja -DCMAKE_C_COMPILER=icx build>cmake --build .
If your application requires a customized setting, the following CMakeLists.txt demonstrates how to achieve a more fine-grained control.
CMakeLists.txt
cmake_minimum_required(VERSION 3.13) project(oneMKL_Example LANGUAGES C) find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT}) message(STATUS "Imported oneMKL targets: ${MKL_IMPORTED_TARGETS}") add_executable(myapp app.c) target_compile_options(myapp PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>) target_include_directories(myapp PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>) target_link_libraries(myapp PUBLIC $<LINK_ONLY:MKL::MKL>)
Ninja and NMake Makefiles are supported on Windows. Other CMake Generators may work but are not guaranteed.
When the Ninja build system is in use, Ninja 1.10.2 or later is required for Fortran support.
CBLAS Example
mkl_example.h
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! Intel(R) oneAPI Math Kernel Library (oneMKL) examples interface
!******************************************************************************/
#ifndef _MKL_EXAMPLE_H_
#define _MKL_EXAMPLE_H_
#include "mkl_types.h"
#include "mkl_cblas.h"
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define COMMENTS ':'
#define MAX_STRING_LEN 512
#define MIN(a,b) (((a) > (b)) ? (b) : (a))
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define GENERAL_MATRIX 0
#define UPPER_MATRIX 1
#define LOWER_MATRIX -1
#define FULLPRINT 0
#define SHORTPRINT 1
#ifdef MKL_ILP64
#define INT_FORMAT "%lld"
#else
#define INT_FORMAT "%d"
#endif
/*
* ===========================================
* Prototypes for example program functions
* ===========================================
*/
int GetIntegerParameters(FILE*, ...);
int GetIntegerParameters8(FILE*, ...);
int GetIntegerParameters16(FILE*, ...);
int GetIntegerParameters32(FILE*, ...);
int GetScalarsH(FILE*, ...);
int GetScalarsD(FILE*, ...);
int GetScalarsS(FILE*, ...);
int GetScalarsC(FILE*, ...);
int GetScalarsZ(FILE*, ...);
int GetCblasCharParameters(FILE *in_file, ...);
MKL_INT MaxValue(MKL_INT, MKL_INT*);
MKL_INT GetVectorI(FILE*, MKL_INT, MKL_INT*);
MKL_INT GetVectorI32(FILE*, MKL_INT, MKL_INT32*);
MKL_INT GetVectorS(FILE*, MKL_INT, float*, MKL_INT);
MKL_INT GetVectorC(FILE*, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetVectorD(FILE*, MKL_INT, double*, MKL_INT);
MKL_INT GetVectorZ(FILE*, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetArrayI8(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT8*, MKL_INT*);
MKL_INT GetArrayI16(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT16*, MKL_INT*);
MKL_INT GetArrayI32(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT32*, MKL_INT*);
MKL_INT GetArrayBF16(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_BF16*, MKL_INT*);
MKL_INT GetArrayH(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_F16*, MKL_INT*);
MKL_INT GetArrayS(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, float*, MKL_INT*);
MKL_INT GetArrayD(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, double*, MKL_INT*);
MKL_INT GetArrayC(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*);
MKL_INT GetArrayZ(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*);
MKL_INT GetBandArrayS(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT);
MKL_INT GetBandArrayD(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT);
MKL_INT GetBandArrayC(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetBandArrayZ(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetValuesI8(FILE*, MKL_INT8*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI16(FILE*, MKL_INT16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI32(FILE*, MKL_INT32*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI(FILE *, MKL_INT*, MKL_INT, MKL_INT);
MKL_INT GetValuesBF16(FILE *, MKL_BF16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesH(FILE *, MKL_F16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesC(FILE *, MKL_Complex8*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesZ(FILE *, MKL_Complex16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesD(FILE *, double*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesS(FILE *, float*, MKL_INT, MKL_INT, MKL_INT);
float GetValueH(MKL_F16);
void PrintParameters( char*, ... );
void PrintVectorI(MKL_INT, MKL_INT*, char*);
void PrintVectorI32(MKL_INT, MKL_INT32*, char*);
void PrintVectorS(int, MKL_INT, float*, MKL_INT, char*);
void PrintVectorC(int, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintVectorD(int, MKL_INT, double*, MKL_INT, char*);
void PrintVectorZ(int, MKL_INT, MKL_Complex16*, MKL_INT, char*);
void PrintArrayI8(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT8*, MKL_INT*, char*);
void PrintArrayI16(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT16*, MKL_INT*, char*);
void PrintArrayI32(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT32*, MKL_INT*, char*);
void PrintArrayBF16(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_BF16*, MKL_INT*, char*);
void PrintArrayH(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_F16*, MKL_INT*, char*);
void PrintArrayS(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, float*, MKL_INT*, char*);
void PrintArrayD(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, double*, MKL_INT*, char*);
void PrintArrayC(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*, char*);
void PrintArrayZ(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*, char*);
void PrintBandArrayS(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT, char*);
void PrintBandArrayD(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT, char*);
void PrintBandArrayC(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintBandArrayZ(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT, char*);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _MKL_EXAMPLE_H_ */
common_func.c
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
!
!******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <math.h>
#include "mkl_example.h"
#include "mkl_cblas.h"
MKL_INT MaxValue(MKL_INT n, MKL_INT *x)
{
MKL_INT i, indmax;
indmax = x[0];
for (i = 1; i < n; i++)
if (indmax < x[i]) indmax = x[i];
return indmax;
} /* MaxValue */
typedef union {
float float_part;
MKL_BF16 int_part[2];
} conv_union_bf16;
typedef union {
MKL_F16 raw;
struct {
unsigned int frac : 10;
unsigned int exp : 5;
unsigned int sign : 1;
} bits;
} conv_union_f16;
typedef union {
float raw;
struct {
unsigned int frac : 23;
unsigned int exp : 8;
unsigned int sign : 1;
} bits;
} conv_union_f32;
static float b2f(MKL_BF16 src) {
conv_union_bf16 conv;
conv.int_part[0] = 0;
conv.int_part[1] = src;
return conv.float_part;
}
static MKL_BF16 f2b(float src) {
conv_union_bf16 conv;
conv.float_part = src;
return conv.int_part[1];
}
static float h2f(MKL_F16 x) {
conv_union_f16 src;
conv_union_f32 dst;
src.raw = x;
dst.raw = 0;
dst.bits.sign = src.bits.sign;
if (src.bits.exp == 0x01fU) {
dst.bits.exp = 0xffU;
if (src.bits.frac > 0) {
dst.bits.frac = ((src.bits.frac | 0x200U) << 13);
}
} else if (src.bits.exp > 0x00U) {
dst.bits.exp = src.bits.exp + ((1 << 7) - (1 << 4));
dst.bits.frac = (src.bits.frac << 13);
} else {
unsigned int v = (src.bits.frac << 13);
if (v > 0) {
dst.bits.exp = 0x71;
while ((v & 0x800000UL) == 0) {
dst.bits.exp --;
v <<= 1;
}
dst.bits.frac = v;
}
}
return dst.raw;
}
static MKL_F16 f2h(float x) {
conv_union_f32 src;
conv_union_f16 dst;
src.raw = x;
dst.raw = 0;
dst.bits.sign = src.bits.sign;
if (src.bits.exp == 0x0ffU) {
dst.bits.exp = 0x01fU;
dst.bits.frac = (src.bits.frac >> 13);
if (src.bits.frac > 0) dst.bits.frac |= 0x200U;
} else if (src.bits.exp >= 0x08fU) {
dst.bits.exp = 0x01fU;
dst.bits.frac = 0x000U;
} else if (src.bits.exp >= 0x071U){
dst.bits.exp = src.bits.exp + ((1 << 4) - (1 << 7));
dst.bits.frac = (src.bits.frac >> 13);
} else if (src.bits.exp >= 0x067U){
dst.bits.exp = 0x000;
if (src.bits.frac > 0) {
dst.bits.frac = (((1U << 23) | src.bits.frac) >> 14);
} else {
dst.bits.frac = 1;
}
}
return dst.raw;
}
float GetValueH(MKL_F16 x)
{
return h2f( x );
} /* GetValueH */
MKL_INT GetVectorI(FILE *in_file, MKL_INT n, MKL_INT *x)
{
return GetValuesI( in_file, x, 0, n );
} /* GetVectorI */
MKL_INT GetVectorI16(FILE *in_file, MKL_INT n, MKL_INT16 *x)
{
return GetValuesI16( in_file, x, 1, 0, n );
} /* GetVectorI16 */
MKL_INT GetVectorI32(FILE *in_file, MKL_INT n, MKL_INT32 *x)
{
return GetValuesI32( in_file, x, 1, 0, n );
} /* GetVectorI32 */
MKL_INT GetVectorS(FILE *in_file, MKL_INT n, float *x, MKL_INT incx)
{
return GetValuesS( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorS */
MKL_INT GetVectorD(FILE *in_file, MKL_INT n, double *x, MKL_INT incx)
{
return GetValuesD( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorD */
MKL_INT GetVectorC(FILE *in_file, MKL_INT n, MKL_Complex8 *x, MKL_INT incx)
{
return GetValuesC( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorC */
MKL_INT GetVectorZ(FILE *in_file, MKL_INT n, MKL_Complex16 *x, MKL_INT incx)
{
return GetValuesZ( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorZ */
MKL_INT GetArrayI8(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT8 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT8 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI8 */
MKL_INT GetArrayI16(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT16 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI16 */
MKL_INT GetArrayI32(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT32 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT32 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI32 */
MKL_INT GetArrayBF16(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_BF16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_BF16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayBF16 */
MKL_INT GetArrayH(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_F16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_F16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayH */
MKL_INT GetArrayS(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
float *a, MKL_INT *lda)
{
MKL_INT i, j, number;
float *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayS */
MKL_INT GetArrayD(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
double *a, MKL_INT *lda)
{
MKL_INT i, j, number;
double *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayD */
MKL_INT GetArrayC(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_Complex8 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_Complex8 *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayC */
MKL_INT GetArrayZ(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag,
MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_Complex16 *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if(*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayZ */
MKL_INT GetBandArrayS(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, float *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
float *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesS( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
number = GetValuesS( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
number = GetValuesS( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
}
return 0;
} /* GetBandArrayS */
MKL_INT GetBandArrayD(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, double *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows, number;
double *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesD( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesD( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesD( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayD */
MKL_INT GetBandArrayC(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, MKL_Complex8 *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
MKL_Complex8 *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesC( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesC( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesC( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayC */
MKL_INT GetBandArrayZ(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, MKL_Complex16 *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
MKL_Complex16 *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesZ( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesZ( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesZ( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayZ */
MKL_INT GetValuesI( FILE *in_file, MKL_INT *in_array, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
int value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i]=(MKL_INT)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI8( FILE *in_file, MKL_INT8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
int value_int;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value_int) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT8)value_int;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI16( FILE *in_file, MKL_INT16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
MKL_INT16 value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%hd", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT16)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI32( FILE *in_file, MKL_INT32 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
MKL_INT32 value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT32)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesBF16( FILE *in_file, MKL_BF16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=f2b(temp);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesBF16 */
MKL_INT GetValuesH( FILE *in_file, MKL_F16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=f2h(temp);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesH */
MKL_INT GetValuesD( FILE *in_file, double *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
double temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%lf", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=temp;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesD */
MKL_INT GetValuesS( FILE *in_file, float *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=temp;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesS */
MKL_INT GetValuesC( FILE *in_file, MKL_Complex8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float re, im;
char seps[]=" ,()\t";
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps)) == NULL ||
sscanf( str, "%f", &im) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld].real=re;
in_array[begin+i*ld].imag=im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
}
return counter;
} /* GetValuesC */
MKL_INT GetValuesZ( FILE *in_file, MKL_Complex16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
double re, im;
char seps[]=" ,()\t";
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps))== NULL ||
sscanf( str, "%lf", &im) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld].real=re;
in_array[begin+i*ld].imag=im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
}
return counter;
} /* GetValuesZ */
int GetIntegerParameters8(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT8 *param;
va_list ap;
int value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT8 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT8)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT8 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters16 */
int GetIntegerParameters16(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT16 *param;
va_list ap;
MKL_INT16 value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT16 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%hd", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT16)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT16 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters16 */
int GetIntegerParameters32(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT32 *param;
va_list ap;
MKL_INT32 value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT32 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT32)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT32 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters32 */
int GetIntegerParameters(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT *param;
va_list ap;
int value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters */
int GetCblasCharParameters(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
int *param;
va_list ap;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, int *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", param ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, int * );
}
va_end( ap );
return counter;
} /* GetCblasCharParameters */
int GetScalarsH( FILE *in_file, ... )
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_F16 *param;
va_list ap;
float t;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_F16 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", &t ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
*param = f2h(t);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_F16 * );
}
va_end( ap );
return counter;
} /* GetScalarsH */
int GetScalarsD( FILE *in_file, ... )
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
double *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, double * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%lf", param ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, double * );
}
va_end( ap );
return counter;
} /* GetScalarsD */
int GetScalarsS( FILE *in_file, ... )
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
float *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, float * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", param ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, float * );
}
va_end( ap );
return counter;
} /* GetScalarsS */
int GetScalarsC( FILE *in_file, ... )
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
char seps[]=" ,()\t";
float re = 0.f, im = 0.f;
MKL_Complex8 *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_Complex8 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps))== NULL ||
sscanf( str, "%f", &im) != 1 ){
printf( "\n File format is inappropriate %f %f\n", re, im );
va_end( ap );
return 0;
}
param->real = re;
param->imag = im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
param = va_arg( ap, MKL_Complex8 * );
}
va_end( ap );
return counter;
} /* GetScalarsC */
int GetScalarsZ( FILE *in_file, ... )
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
char seps[]=" ,()\t";
double re = 0., im = 0.;
MKL_Complex16 *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_Complex16 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps)) == NULL ||
sscanf( str, "%lf", &im) != 1 ){
printf( "\n File format is inappropriate %f %f\n", re, im );
va_end( ap );
return 0;
}
param->real = re;
param->imag = im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
param = va_arg( ap, MKL_Complex16 * );
}
va_end( ap );
return counter;
} /* GetScalarsZ */
void PrintParameters( char *names, ... )
{
char *p, *str, str1[MAX_STRING_LEN], buf[MAX_STRING_LEN];
va_list ap;
char seps[]=" ,";
MKL_INT itmp;
MKL_UINT i;
printf("\n ");
va_start( ap, names );
strcpy(buf, names);
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n You must determine the parameters names\n");
return;
}
do {
strcpy(str1, str);
p = str1;
for( i = 0; i < strlen(str1); i++ ) {
*p = (char) tolower(str1[i]);
p++;
}
if ( strcmp( str1, "layout" ) == 0 ) {
itmp = va_arg( ap, CBLAS_LAYOUT );
if ( itmp == CblasRowMajor )
printf("LAYOUT = CblasRowMajor ");
else if ( itmp == CblasColMajor )
printf("LAYOUT = CblasColMajor ");
} else if ( strcmp( str1, "side" ) == 0 ) {
itmp = va_arg( ap, CBLAS_SIDE);
if ( itmp == CblasLeft )
printf("SIDE = CblasLeft ");
else if ( itmp == CblasRight )
printf("SIDE = CblasRight ");
} else if ( strcmp( str1, "uplo" ) == 0 ) {
itmp = va_arg( ap, CBLAS_UPLO );
if ( itmp == CblasUpper )
printf("UPLO = CblasUpper ");
else if ( itmp == CblasLower )
printf("UPLO = CblasLower ");
} else if ( strcmp( str1, "diag" ) == 0 ) {
itmp = va_arg( ap, CBLAS_DIAG );
if ( itmp == CblasUnit )
printf("DIAG=CblasUnit ");
else if ( itmp == CblasNonUnit )
printf("DIAG = CblasNonUnit ");
} else if ( ( strcmp( str1, "trans" ) == 0 ) ||
( strcmp( str1, "transa" ) == 0 ) ||
( strcmp( str1, "transb" ) == 0 ) ) {
itmp = va_arg( ap, CBLAS_TRANSPOSE );
if ( itmp == CblasNoTrans )
printf("%s = CblasNoTrans ", str);
else if ( itmp == CblasTrans )
printf("%s = CblasTrans ", str);
else if ( itmp == CblasConjTrans )
printf("%s = CblasConjTrans ", str);
} else if ( strcmp( str1, "offsetc" ) == 0 ) {
itmp = va_arg( ap, CBLAS_OFFSET );
if ( itmp == CblasFixOffset ) {
printf("OFFSETC = CblasFixOffset ");
} else if ( itmp == CblasRowOffset ) {
printf("OFFSETC = CblasRowOffset ");
} else if ( itmp == CblasColOffset ) {
printf("OFFSETC = CblasColOffset ");
}
}
} while ( (str = strtok( NULL, seps )) != NULL );
va_end( ap );
return;
} /* PrintParameters */
void PrintVectorI(MKL_INT n, MKL_INT *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" "INT_FORMAT, *(x+i));
return;
} /* PrintVectorI */
void PrintVectorI16(MKL_INT n, MKL_INT16 *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" %d", *(x+i));
return;
} /* PrintVectorI16 */
void PrintVectorI32(MKL_INT n, MKL_INT32 *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" %d", *(x+i));
return;
} /* PrintVectorI32 */
void PrintVectorS(int flag, MKL_INT n, float *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("%6.2f ", *(x+i));
return;
} /* PrintVectorS */
void PrintVectorD(int flag, MKL_INT n, double *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("%8.3f ", *(x+i));
return;
} /* PrintVectorD */
void PrintVectorC(int flag, MKL_INT n, MKL_Complex8 *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag);
return;
} /* PrintVectorC */
void PrintVectorZ(int flag, MKL_INT n, MKL_Complex16 *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag);
return;
} /* PrintVectorZ */
void PrintArrayI8(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT8 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT8 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%d ", (int) *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", (int)*(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%d ", (int) *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI8 */
void PrintArrayI16(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%hd ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%hd ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%hd ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI16 */
void PrintArrayI32(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT32 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT32 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%d ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%d ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI32 */
void PrintArrayBF16(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_BF16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_BF16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayBF16 */
void PrintArrayH(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_F16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_F16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayH */
void PrintArrayS(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, float *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
float *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayS */
void PrintArrayD(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, double *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
double *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT,
name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayD */
void PrintArrayC(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_Complex8 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_Complex8 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT,
name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} /* if */
} else if(*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayC */
void PrintArrayZ(CBLAS_LAYOUT*layout, int flag1, int flag2,
MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_Complex16 *addr;
if (flag1 == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
else
printf("\n ARRAY %s", name);
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayZ */
void PrintBandArrayS(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
float *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
float *addr, *addr1;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("%6.2f ", *(addr1+j+l*lda) );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("%6.2f ", *(addr1+l*lda) );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("%6.2f ", *(addr1+l*lda) );
}
} /* if */
return;
} /* PrintBandArrayS */
void PrintBandArrayD(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
double *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
double *addr, *addr1;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("%6.2f ", *(addr1+j+l*lda) );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("%6.2f ", *(addr1+l*lda) );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("%6.2f ", *(addr1+l*lda) );
}
} /* if */
return;
} /* PrintBandArrayD */
void PrintBandArrayC(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
MKL_Complex8 *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
MKL_Complex8 *addr, *addr1;
MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+j+l*lda)->real, (addr1+j+l*lda)->imag );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
}
} /* if */
return;
} /* PrintBandArrayC */
void PrintBandArrayZ(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
MKL_Complex16 *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
MKL_Complex16 *addr, *addr1;
MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+j+l*lda)->real, (addr1+j+l*lda)->imag );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
}
} /* if */
return;
} /* PrintBandArrayZ */
cblas_sgemmx.c
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! C B L A S _ S G E M M Example Program Text ( C Interface )
!******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
#include "mkl_example.h"
int main(int argc, char *argv[])
{
FILE *in_file;
char *in_file_name;
MKL_INT m, n, k;
MKL_INT lda, ldb, ldc;
MKL_INT rmaxa, cmaxa, rmaxb, cmaxb, rmaxc, cmaxc;
float alpha, beta;
float *a, *b, *c;
CBLAS_LAYOUT layout;
CBLAS_TRANSPOSE transA, transB;
MKL_INT ma, na, mb, nb;
printf("\n C B L A S _ S G E M M EXAMPLE PROGRAM\n");
/* Get input parameters */
if( argc == 1 ) {
printf("\n You must specify in_file data file as 1-st parameter");
return 1;
}
in_file_name = argv[1];
/* Get input data */
if( (in_file = fopen( in_file_name, "r" )) == NULL ) {
printf("\n ERROR on OPEN '%s' with mode=\"r\"\n", in_file_name);
return 1;
}
if( GetIntegerParameters(in_file, &m, &n, &k) != 3 ) {
printf("\n ERROR of m, n, k reading\n");
fclose(in_file);
return 1;
}
if( GetScalarsS(in_file, &alpha, &beta) != 2 ) {
printf("\n ERROR of alpha, beta reading\n");
fclose(in_file);
return 1;
}
if( GetCblasCharParameters(in_file, &transA, &transB, &layout) != 3 ) {
printf("\n ERROR of transA, transB, layout reading\n");
fclose(in_file);
return 1;
}
if( transA == CblasNoTrans ) {
rmaxa = m + 1;
cmaxa = k;
ma = m;
na = k;
} else {
rmaxa = k + 1;
cmaxa = m;
ma = k;
na = m;
}
if( transB == CblasNoTrans ) {
rmaxb = k + 1;
cmaxb = n;
mb = k;
nb = n;
} else {
rmaxb = n + 1;
cmaxb = k;
mb = n;
nb = k;
}
rmaxc = m + 1;
cmaxc = n;
a = (float *)mkl_calloc(rmaxa*cmaxa, sizeof( float ), 64);
b = (float *)mkl_calloc(rmaxb*cmaxb, sizeof( float ), 64);
c = (float *)mkl_calloc(rmaxc*cmaxc, sizeof( float ), 64);
if( a == NULL || b == NULL || c == NULL ) {
printf( "\n Can't allocate memory for arrays\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if (layout == CblasRowMajor) {
lda=cmaxa;
ldb=cmaxb;
ldc=cmaxc;
} else {
lda=rmaxa;
ldb=rmaxb;
ldc=rmaxc;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &ma, &na, a, &lda) != 0 ) {
printf("\n ERROR of array A reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &mb, &nb, b, &ldb) != 0 ) {
printf("\n ERROR of array B reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &m, &n, c, &ldc) != 0 ) {
printf("\n ERROR of array C reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
fclose(in_file);
/* Print input data */
printf("\n INPUT DATA");
printf("\n M="INT_FORMAT" N="INT_FORMAT" K="INT_FORMAT, m, n, k);
printf("\n ALPHA=%5.1f BETA=%5.1f", alpha, beta);
PrintParameters("TRANSA, TRANSB", transA, transB);
PrintParameters("LAYOUT", layout);
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &ma, &na, a, &lda, "A");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &mb, &nb, b, &ldb, "B");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &m, &n, c, &ldc, "C");
/* Call SGEMM subroutine ( C Interface ) */
cblas_sgemm(layout, transA, transB, m, n, k, alpha,
a, lda, b, ldb, beta, c, ldc);
/* Print output data */
printf("\n\n OUTPUT DATA");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &m, &n, c, &ldc, "C");
mkl_free(a);
mkl_free(b);
mkl_free(c);
return 0;
}
cblas_sgemmx.d
: C B L A S _ S G E M M EXAMPLE PROGRAM DATA
3 5 4 :Values of m, n, k
0.5 1.2 :Values of alpha, beta
111 111 101 :Values of transA, transB, layout
1.0 2.0 3.0 4.0
3.2 2.7 8.1 5.0
0.0 4.0 5.0 3.0 :Values of array A
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0 :Values of array B
0.0 0.0 1.0 1.0 1.0
0.0 0.0 1.0 1.0 1.0
0.0 0.0 1.0 1.0 1.0 :Values of array C
CMakeLists.txt
#===============================================================================
# Copyright 2023 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#===============================================================================
cmake_minimum_required(VERSION 3.13)
enable_testing()
# Define language and file extensions
set(TEST_LANG C)
set(TEST_EXT c)
set(DATA_EXT d)
project(MKL_Example LANGUAGES ${TEST_LANG})
# Include MKLConfig.cmake
find_package(MKL CONFIG REQUIRED)
# Build extra files
add_library(blas_common common_func.c)
if(WIN32)
set(TEST_COPT "-D_CRT_SECURE_NO_WARNINGS")
endif()
# Add some parts of MKL target: headers and compiler options
target_include_directories(blas_common PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_options(blas_common PUBLIC ${TEST_COPT} $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
set(TEST_LOPT blas_common)
# Build target
set(func "example")
set(func_name "cblas_sgemmx")
set(${func}_SRC ${func_name}.${TEST_EXT})
add_executable(${func} ${${func}_SRC})
set_property(TARGET ${func} PROPERTY C_STANDARD 99)
# Add MKL target: headers, compiler options and libraries to link
target_link_libraries(${func} PUBLIC ${TEST_LOPT} MKL::MKL)
# Register example as ctest
set(${func_name}_DATA ${PROJECT_SOURCE_DIR}/${func_name}.${DATA_EXT})
add_test(NAME ${func} COMMAND ${func} ${${func_name}_DATA})
# Add oneMKL environment variables if any defined
if(MKL_ENV)
set_tests_properties(${func} PROPERTIES ENVIRONMENT "${MKL_ENV}")
endif()
ScaLAPACK Example
matrix_generator.h
/*******************************************************************************
* Copyright 2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! Intel(R) oneAPI Math Kernel Library ScaLAPACK C example source file
!
!*******************************************************************************
!===============================================================================
! Matrix generator routines for ScaLAPACK p?getrf and p?getrs example program
!===============================================================================
! List of routines are given in the file:
!
! pdmatgen_random
! psmatgen_random
! pzmatgen_random
! psmatgen_random
!
! Definitions for element index numbers of ScaLAPACK array descriptors
*/
#include "mkl.h"
#include "mkl_blacs.h"
#include "mkl_pblas.h"
#include "mkl_scalapack.h"
#define DTYPE_ 1
#define CTXT_ 2
#define M_ 3
#define N_ 4
#define MB_ 5
#define NB_ 6
#define RSRC_ 7
#define CSRC_ 8
#define LLD_ 9
/* Additional definitions */
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
/* Local real and complex constants */
#define RZERO 0.0
#define RONE 1.0
#define RNEGONE -1.0
#define CZERO {0.0f, 0.0f}
#define CONE {1.0f, 0.0f}
#define CNEGONE {-1.0f, 0.0f}
const MKL_INT izero = 0, ione = 1, isix = 6, inegone = -1, dist=1;
void pdmatgen_random(MKL_INT type, MKL_INT* desca, double* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
/*
* Purpose
* =======
*
* pdmatgen_random: parallel real double precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* A random matrix is generated if type != 0.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to an array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over. The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
{
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if(type) {
a[il+ii+(jl+jj)*lld] = (double) dlarnd( &dist, seed );
} else {
if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
else a[il+ii+(jl+jj)*lld] = RZERO;
}
}
}
}
}
}
}
void psmatgen_random(MKL_INT type, MKL_INT* desca, float* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* psmatgen_random: parallel real float precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* A random matrix is generated if type != 0.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a float array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over. The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if ( iarow==myrow && iacol==mycol ) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld] = (float) dlarnd( &dist, seed );
} else {
if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
else a[il+ii+(jl+jj)*lld] = RZERO;
}
}
}
}
}
}
}
void pzmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex16* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* pzmatgen_random: parallel double complex precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* Otherwise a random matrix is generated.
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the identity matrix.
* Otherwise a random matrix is generated.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a MKL_Complex16 array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over. The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld].real = (double) dlarnd( &dist, seed );
a[il+ii+(jl+jj)*lld].imag = (double) dlarnd( &dist, seed );
} else {
if( i==j ) {
a[il+ii+(jl+jj)*lld].real = RONE;
a[il+ii+(jl+jj)*lld].imag = RZERO;
} else {
a[il+ii+(jl+jj)*lld].real = RZERO;
a[il+ii+(jl+jj)*lld].imag = RZERO;
}
}
}
}
}
}
}
}
void pcmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex8* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* pcmatgen_random: parallel single complex precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* Otherwise a random matrix is generated.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a MKL_Complex8 array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over. The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld].real = (float) dlarnd( &dist, seed );
a[il+ii+(jl+jj)*lld].imag = (float) dlarnd( &dist, seed );
} else {
if( i==j ) {
a[il+ii+(jl+jj)*lld].real = RONE;
a[il+ii+(jl+jj)*lld].imag = RZERO;
} else {
a[il+ii+(jl+jj)*lld].real = RZERO;
a[il+ii+(jl+jj)*lld].imag = RZERO;
}
}
}
}
}
}
}
}
psgetrf_example.c
/*******************************************************************************
* Copyright 2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************
! Content:
! Intel(R) oneAPI Math Kernel Library ScaLAPACK C example source file
!
!*****************************************************************************
!===============================================================================
!==== ScaLAPACK psgetrf and psgetrs example program ============================
!===============================================================================
!
! Example solves a system of distributed linear equations
!
! A * X = Y or A * X = I
!
! with a general random n-by-n distributed matrix A using the LU
! factorization computed by psgetrf.
! Y is a random n-by-nrhs distributed matrix,
! I denotes n-by-nrhs matrix which possesses 1 as all
! elements along its leading (main) diagonal;
! while all other elements in the matrix are 0.
! If n==nrhs, I is the identity matrix and X is the matrix inverse.
!
! Example also demonstrates BLACS routines usage.
!
! It works with block-cyclic distribution on 2D process grid
!
! List of routines demonstrated in the example:
!
! psgetrf
! psgetrs
! pslamch
! pslange
! psgemr2d
! psgemm
! blacs_get
! blacs_gridinit
! blacs_gridinfo
! blacs_gridexit
! blacs_exit
! descinit
! igamx2d
! igebr2d
! igebs2d
! sgebr2d
! sgebs2d
! numroc
! indxg2p
!
! The program must be driven by a short data file.
!
! 0 type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y
! 2000 n, dimension of matrix, 0 < n
! 1000 nrhs,number of right hand sides
! 16 nb, size of blocks, must be > 0
! 2 p, number of rows in the process grid, must be > 0
! 2 q, number of columns in the process grid, must be > 0, p*q = number of processes
! 20.0 threshold for residual check
!
!*******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "matrix_generator.h"
MKL_INT test_solve(MKL_INT ictxt, MKL_INT type, MKL_INT n, MKL_INT nrhs, MKL_INT nb, MKL_INT nprow, MKL_INT npcol, MKL_INT* seed, float thresh)
{
MKL_INT info = 0;
MKL_INT err = 0;
float *A = NULL;
float *A_INI = NULL;
float *X = NULL;
float *Y = NULL;
MKL_INT *IPIV = NULL;
/* Matrix descriptors */
MKL_INT desca[9];
MKL_INT descx[9];
MKL_INT descy[9];
/* Local variables */
MKL_INT myrow;
MKL_INT mycol;
MKL_INT iam, nprocs;
float eps;
float normA, normRhs, normSol, normRes, residual;
float fzero = RZERO;
float fone = RONE;
float fnegone = RNEGONE;
/* Compute machine epsilon */
eps = pslamch( &ictxt, "e" );
/* Get information about how many processes are used for program execution
and number of current process */
blacs_pinfo( &iam, &nprocs );
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
/* Compute precise length of local pieces of distributed matrices */
MKL_INT loc_ra = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_ca = numroc(&n, &nb, &mycol, &izero, &npcol);
MKL_INT loc_lda = max(1, loc_ra);
MKL_INT loc_sizeA =loc_ra*loc_ca;
MKL_INT loc_rx = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_cx = numroc(&nrhs, &nb, &mycol, &izero, &npcol);
MKL_INT loc_ldx = max(1, loc_rx);
MKL_INT loc_ry = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_cy = numroc(&nrhs, &nb, &mycol, &izero, &npcol);
MKL_INT loc_ldy = max(1, loc_ry);
/* Initialize descriptors for distributed arrays */
descinit(desca, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &loc_lda, &info);
descinit(descx, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &loc_ldx, &info);
descinit(descy, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &loc_ldy, &info);
/* Allocate matrix, right-hand-side, vector solution x and the IPIV array
containing the pivoting information. The array A_INI is used to store
original matrix */
if( loc_sizeA ) {
A = (float*)mkl_malloc( loc_sizeA * sizeof(float), 64 );
A_INI=(float*)mkl_malloc( loc_sizeA * sizeof(float), 64 );
}
if( loc_rx * loc_cx ) X = (float*)mkl_malloc( loc_rx * loc_cx * sizeof(float), 64 );
if( loc_ry * loc_cy ) Y = (float*)mkl_malloc( loc_ry * loc_cy * sizeof(float), 64 );
IPIV = (MKL_INT*)mkl_malloc( (loc_ra + nb)*sizeof(MKL_INT), 64 );
if( A == NULL || X == NULL || Y == NULL || A_INI == NULL || IPIV == NULL ) info = 1;
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
if( info ) {
err = 1;
if ( iam == 0 ) printf( "\n Can't allocate memory for arrays\n" );
goto early_exit;
}
/* Generate matrix and solution. Copy the initial matrix. */
seed[0]+= myrow + mycol;
psmatgen_random(1, desca, A, seed, nprow, npcol);
scopy(&loc_sizeA, A, &ione, A_INI, &ione);
psmatgen_random(type, descx, X, seed, nprow, npcol);
psmatgen_random(type, descy, Y, seed, nprow, npcol);
/* Compute the right hand side */
if( type ) {
psgemm("N", "N", &n, &nrhs, &n, &fone, A, &ione, &ione, desca,
X, &ione, &ione, descx,
&fzero, Y, &ione, &ione, descy);
/* Copy the right hand side Y to X */
psgemr2d(&n, &nrhs, Y, &ione, &ione, descy, X, &ione, &ione, descx, &ictxt);
}
/* Compute norm of the initial matrix and right hand side */
normA = pslange("M", &n, &n, A, &ione, &ione, desca, NULL);
normRhs = pslange("M", &n, &nrhs, Y, &ione, &ione, descy, NULL);
/* Compute the LU factorization of the initial matrix */
psgetrf(&n, &n, A, &ione, &ione, desca, IPIV, &info);
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
/* If psgetrf fails, deallocate all arays and report an error */
if( info) {
err = 1;
if ( iam == 0 ) printf( "\n psgetrf fails to compute LU decomposition \n" );
goto early_exit;
}
psgetrs("N", &n, &nrhs, A, &ione, &ione, desca, IPIV, X, &ione, &ione, descx, &info);
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
/* If psgetrs fails, deallocate all arays and report an error */
if( info ) {
err = 1;
if ( iam == 0 ) printf( "\n psgetrs fails to solve \n" );
goto early_exit;
}
/* Normsol is the norm of the solution matrix: normsol = || X|| */
normSol = pslange("M", &n, &nrhs, X, &ione, &ione, descx, NULL);
/* Compute the residual matrix || A*X - Y || */
psgemm("N", "N", &n, &nrhs, &n, &fone, A_INI, &ione, &ione, desca,
X, &ione, &ione, descx,
&fnegone, Y, &ione, &ione, descy);
/* Compute the residual as:
residual = ||A*X-Y||/(n*eps*(||A||*||X||+||Y||)) */
normRes = pslange("M", &n, &nrhs, Y, &ione, &ione, descy, NULL);
residual = normRes/( ( (float)n ) * eps *( normA * normSol + normRhs ) );
/* Check if residual passed or failed the threshold */
if( iam == 0 ) {
if ( ( thresh >= RZERO ) && !( residual <= thresh ) ){
printf( "FAILED. Residual = %05.11f\n", residual );
err = 1;
} else {
printf( "PASSED. Residual = %05.11f\n", residual );
}
printf( "=== END OF EXAMPLE =====================\n" );
}
early_exit :
/* Destroy arrays */
if( A ) mkl_free(A);
if( A_INI ) mkl_free(A_INI);
if( X ) mkl_free(X);
if( Y ) mkl_free(Y);
if( IPIV ) mkl_free(IPIV);
return err;
}
int main(int argc, char *argv[])
{
/* ==== Declarations =================================================== */
/* File variables */
FILE *fin;
/* Local scalars */
MKL_INT iam, nprocs, ictxt=-1, nprow, npcol;
MKL_INT n, nb, nrhs;
MKL_INT err;
MKL_INT type=1;
int n_int, nb_int, nprow_int, npcol_int, nrhs_int, type_int;
float thresh;
MKL_INT seed[4] = { 105, 1410, 1860, 4085 };
/* Local arrays */
MKL_INT iw[ 6 ];
/* ==== Executable statements ========================================== */
/* Get information about how many processes are used for program execution
and number of current process */
blacs_pinfo( &iam, &nprocs );
/* Init temporary 1D process grid */
blacs_get( &inegone, &izero, &ictxt );
blacs_gridinit( &ictxt, "C", &nprocs, &ione );
/* Open input file */
if ( iam == 0 ) {
fin = fopen( "psgetrf_example.in", "r" );
if ( fin == NULL ) {
printf( "Error while open input file." );
return 1;
}
}
/* Read data and send it to all processes */
if ( iam == 0 ) {
/* Read parameters */
fscanf( fin, "%d type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y", &type_int );
fscanf( fin, "%d n, dimension of matrix, 0 < n", &n_int );
fscanf( fin, "%d nrhs,number of right hand sides, 0 < nrhs", &nrhs_int );
fscanf( fin, "%d nb, size of blocks, must be > 0", &nb_int );
fscanf( fin, "%d p, number of rows in the process grid, must be > 0", &nprow_int );
fscanf( fin, "%d q, number of columns in the process grid, must be > 0, p*q = number of processes", &npcol_int );
fscanf( fin, "%f threshold for residual check", &thresh );
fclose( fin );
type = (MKL_INT) type_int;
n = (MKL_INT) n_int;
nrhs = (MKL_INT) nrhs_int;
nb = (MKL_INT) nb_int;
nprow = (MKL_INT) nprow_int;
npcol = (MKL_INT) npcol_int;
/* Check if all parameters are correct */
if( ( n<=0 )||( nrhs<=0 )||( nb<=0 )||( nprow<=0 )||
( npcol<=0 )||( nprow*npcol != nprocs ) ) {
printf( "One or several input parameters has incorrect value. Limitations:\n" );
printf( "n > 0, nrhs > 0, nb > 0, p > 0, q > 0 - integer\n" );
printf( "p*q = number of processes\n" );
printf( "threshold - float \n");
return 1;
}
/* Pack data into array and send it to other processes */
iw[ 0 ] = type;
iw[ 1 ] = n;
iw[ 2 ] = nrhs;
iw[ 3 ] = nb;
iw[ 4 ] = nprow;
iw[ 5 ] = npcol;
igebs2d( &ictxt, "All", " ", &isix, &ione, iw, &isix );
sgebs2d( &ictxt, "All", " ", &ione, &ione, &thresh, &ione );
} else {
/* Recieve and unpack data */
igebr2d( &ictxt, "All", " ", &isix, &ione, iw, &isix, &izero, &izero );
sgebr2d( &ictxt, "All", " ", &ione, &ione, &thresh, &ione, &izero, &izero );
type = iw[ 0 ];
n = iw[ 1 ];
nrhs = iw[ 2 ];
nb = iw[ 3 ];
nprow = iw[ 4 ];
npcol = iw[ 5 ];
}
/* Destroy temporary process grid */
blacs_gridexit( &ictxt );
/* Init workind 2D process grid */
blacs_get(&inegone, &izero, &ictxt);
blacs_gridinit(&ictxt, "R", &nprow, &npcol);
if ( iam == 0 ) {
/* Print information of task */
printf( "=== START OF EXAMPLE ===================\n" );
if( type ) {
printf( "Solve matrix equation: A*X = Y\n" );
printf( "with a general random n-by-n distributed matrix A \n" );
printf( "Y is a random n-by-nrhs distributed matrix\n\n" );
} else {
printf( "Solve matrix equation: A*X = I\n" );
printf( "with a general random n-by-n distributed matrix A \n" );
printf( "I denotes a n-by-nrhs matrix which possesses 1 \n" );
printf( "as all elements along its leading (main) diagonal \n" );
printf( "while all other elements in the matrix are 0. \n" );
printf( "If n==nrhs, I is the identity matrix and X is the matrix inverse of A.\n\n" );
}
printf( "n = %d, nrhs = %d, nb = %d; %dx%d - process grid\n\n", n_int, nrhs_int, nb_int, nprow_int, npcol_int);
printf( "Threshold for residual check = %05.11f\n", thresh );
}
err = test_solve(ictxt, type, n, nrhs, nb, nprow, npcol, seed, thresh);
/* Destroy process grid */
blacs_gridexit(&ictxt);
blacs_exit(&izero);
return err;
}
psgetrf_example.in
0 type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y
2000 n, dimension of matrix, 0 < n
1000 nrhs,number of right hand sides
16 nb, size of blocks, must be > 0
2 p, number of rows in the process grid, must be > 0
2 q, number of columns in the process grid, must be > 0, p*q = number of processes
20.0 threshold for residual check
CMakeLists.txt
#===============================================================================
# Copyright 2023 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#===============================================================================
cmake_minimum_required(VERSION 3.13)
enable_testing()
# Enable SCALAPACK in oneMKL
# See MKLConfig.cmake top section for all cluster libraries
set(ENABLE_SCALAPACK ON)
# Define language and compiler
set(TEST_LANG C)
set(TEST_EXT c)
project(MKL_Example LANGUAGES ${TEST_LANG})
# Include oneMKL config file
find_package(MKL CONFIG REQUIRED)
# W/A for known problem in Intel(R) MPI 2021.1
if(MKL_MPI STREQUAL "intelmpi" AND DEFINED ENV{I_MPI_ROOT})
if(UNIX AND NOT APPLE AND $ENV{I_MPI_ROOT} MATCHES "2021.1")
set(MPI_C_ADDITIONAL_INCLUDE_DIRS $ENV{I_MPI_ROOT}/include)
endif()
endif()
# Force mshpc to be the first in mpi find package
if(MKL_MPI STREQUAL "msmpi" OR MKL_MPI STREQUAL "mshpc")
set(MPI_GUESS_LIBRARY_NAME MSMPI)
endif()
find_package(MPI REQUIRED)
# Override default compile/link lines for mpi script
if(USE_MPI_SCRIPT AND WIN32)
set(CMAKE_C_COMPILE_OBJECT "<CMAKE_C_COMPILER> <DEFINES> <INCLUDES> <FLAGS> /Fo<OBJECT> -c <SOURCE>")
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
endif()
# Try to identify MPI for correct BLACS if MKL_MPI is not defined
if(MPI_${TEST_LANG}_FOUND AND NOT MKL_MPI)
if(UNIX AND NOT APPLE AND MPI_${TEST_LANG}_COMPILER MATCHES "openmpi")
set(CURRENT_MPI "openmpi")
elseif(WIN32 AND MPI_${TEST_LANG}_COMPILER MATCHES "msmpi" OR MPI_${TEST_LANG}_COMPILER MATCHES "Microsoft MPI")
set(CURRENT_MPI "msmpi")
elseif(MPI_${TEST_LANG}_COMPILER MATCHES "mpich")
set(CURRENT_MPI "mpich")
elseif(NOT APPLE AND MPI_${TEST_LANG}_COMPILER MATCHES ".*intel.*mpi.*")
set(CURRENT_MPI "intelmpi")
if(WIN32 AND MPI_LOCALONLY)
set(MPI_LOCAL_OPT -localonly)
endif()
endif()
set(MKL_MPI "${CURRENT_MPI}")
endif()
# Define target
set(TEST_INCLUDE "${MPI_${TEST_LANG}_INCLUDE_DIRS}")
# Build example
set(func_name "psgetrf_example")
set(func "${func_name}")
file(GLOB_RECURSE ${func}_SRC ${PROJECT_SOURCE_DIR}/${func_name}.${TEST_EXT})
add_executable(${func} ${${func}_SRC})
target_include_directories(${func} PUBLIC ${TEST_INCLUDE})
if(WIN32)
set(TEST_COPT "-D_CRT_SECURE_NO_WARNINGS")
target_compile_options(${func} PUBLIC ${TEST_COPT})
endif()
target_link_libraries(${func} PUBLIC MKL::MKL MPI::MPI_C)
# Register example as ctest
configure_file(${PROJECT_SOURCE_DIR}/${func_name}.in ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
add_test(NAME ${func} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPI_LOCAL_OPT} ${PROJECT_BINARY_DIR}/${func})
# Add oneMKL environment variables
if(MKL_ENV)
set_tests_properties(${func} PROPERTIES ENVIRONMENT "${MKL_ENV}")
endif()