Examples - Compiling, linking, and running a simple matrix multiplication ATLAS program
This simple sample achieves a multiplication of two matrices, A
and B. A and B have elements randomly generated with values between
0 and 1. The multiplication is achieved in the following ways:
- by calling
dgemm/cblas_dgemm
BLAS functionality provided by ATLAS - by a manual calculation of the same
Sample output produced by all executables across all platforms
and architectures should look like this:
Matrix A has 3 rows and 6 columns:
0.566 0.974 0.202 0.941 0.294 0.427
0.580 0.539 0.772 0.248 0.832 0.848
0.080 0.533 0.434 0.163 0.576 0.416
Matrix B has 6 rows and 4 columns:
0.309 0.316 0.569 0.182
0.725 0.389 0.472 0.649
0.448 0.368 0.354 0.665
0.994 0.740 0.649 0.616
0.133 0.906 0.447 0.590
0.773 0.774 0.893 0.913
Matrix C has 3 rows and 4 columns:
2.276 1.926 1.978 2.013
1.928 2.270 2.148 2.387
1.165 1.356 1.185 1.469
Matrix D has 3 rows and 4 columns:
2.276 1.926 1.978 2.013
1.928 2.270 2.148 2.387
1.165 1.356 1.185 1.469
Pay attention to the fact that matrices C and D are congruent.
Also note that matrix data is organized or ordered in the Fortran way, namely columns major.
Sample 1
This program contains a C invocation
of the Fortran BLAS function dgemm_
provided by the
ATLAS framework.
Observation: In
this sample, the invocation of
dgemm_
has no previously
declared prototype, hence the compiler might issue a warning message.
Prototypes may be declared by including the atlas_f77
header
files, but source files might not have these header files specified
(i.e. old source code written prior to ATLAS).Source
code:
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
void init(double* matrix, int row, int column)
{
for (int j = 0; j < column; j++){
for (int i = 0; i < row; i++){
matrix[j*row + i] = ((double)rand())/RAND_MAX;
}
}
}
void print(const char * name, const double* matrix, int row, int column)
{
printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++){
for (int j = 0; j < column; j++){
printf("%.3f ", matrix[j*row + i]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char * argv[])
{
int rowsA, colsB, common;
int i,j,k;
if (argc != 4){
printf("Using defaults\n");
rowsA = 2; colsB = 4; common = 6;
}
else{
rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
}
double A[rowsA * common]; double B[common * colsB];
double C[rowsA * colsB]; double D[rowsA * colsB];
char transA = 'N', transB = 'N';
double one = 1.0, zero = 0.0;
srand(time(NULL));
init(A, rowsA, common); init(B, common, colsB);
dgemm_(&transA, &transB, &rowsA, &colsB, &common, &one, A,
&rowsA, B, &common, &zero, C, &rowsA);
for(i=0;i<colsB;i++){
for(j=0;j<rowsA;j++){
D[i*rowsA+j]=0;
for(k=0;k<common;k++){
D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
}
}
}
print("A", A, rowsA, common); print("B", B, common, colsB);
print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
return 0;
}
To compile the program for ARCH(10):
xlc -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for
ARCH(10):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch10
-latlas.arch10 -lf2c.arch10 -qfloat=ieee -o sample
To compile the program for ARCH(11):
xlc -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(11):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch11
-latlas.arch11 -lf2c.arch11 -qfloat=ieee -o sample
To compile the program for ARCH(12) and higher ARCHITECTURE
levels:
xlc -c -qfloat=ieee -qround=n -qarch=12 -qtarget=zosv2r3 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(12) and higher ARCHITECTURE
levels:
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch12
-latlas.arch12 -lf2c.arch12 -qfloat=ieee -o sample
Sample 2
This program contains a C++ invocation
of the Fortran BLAS function dgemm_
provided by the
ATLAS framework.
Observation: As
opposed to sample 1, the compiler must be explicitly instructed that
the function
dgemm_
has C linkage and thus no mangling
should be attempted. This can be achieved either as specified, or
by including the appropriate header file with the extern "C" designation.Source
code:
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
extern "C"
{
int dgemm_(char *, char *, int *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
}
void init(double* matrix, int row, int column)
{
for (int j = 0; j < column; j++){
for (int i = 0; i < row; i++){
matrix[j*row + i] = ((double)rand())/RAND_MAX;
}
}
}
void print(const char * name, const double* matrix, int row, int column)
{
printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++){
for (int j = 0; j < column; j++){
printf("%.3f ", matrix[j*row + i]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char * argv[])
{
int rowsA, colsB, common;
int i,j,k;
if (argc != 4){
printf("Using defaults\n");
rowsA = 2; colsB = 4; common = 6;
}
else{
rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
}
double A[rowsA * common]; double B[common * colsB];
double C[rowsA * colsB]; double D[rowsA * colsB];
char transA = 'N', transB = 'N';
double one = 1.0, zero = 0.0;
srand(time(NULL));
init(A, rowsA, common); init(B, common, colsB);
dgemm_(&transA, &transB, &rowsA, &colsB, &common, &one, A,
&rowsA, B, &common, &zero, C, &rowsA);
for(i=0;i<colsB;i++){
for(j=0;j<rowsA;j++){
D[i*rowsA+j]=0;
for(k=0;k<common;k++){
D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
}
}
}
print("A", A, rowsA, common); print("B", B, common, colsB);
print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
return 0;
}
To compile the program for ARCH(10):
xlC -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for
ARCH(10):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch10
-latlas.arch10 -lf2c.arch10 -qfloat=ieee -o sample
To compile the program for ARCH(11):
xlC -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(11):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch11
-latlas.arch11 -lf2c.arch11 -qfloat=ieee -o sample
To compile the program for ARCH(12) and higher ARCHITECTURE
levels:
xlC -c -qfloat=ieee -qround=n -qarch=12 -qtarget=zosv2r3 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(12) and higher ARCHITECTURE
levels:
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch12
-latlas.arch12 -lf2c.arch12 -qfloat=ieee -o sample
Sample 3
This program contains a C invocation
of the CBLAS function cblas_dgemm_
provided by the
ATLAS framework.
Observation: Same
result and functionality as if
dgemm_
would be called,
but this program uses the CBLAS version of the functions.Source
code:
#include <time.h>
#include <stdlib.h>
#include <cblas.h>
void init(double* matrix, int row, int column)
{
for (int j = 0; j < column; j++){
for (int i = 0; i < row; i++){
matrix[j*row + i] = ((double)rand())/RAND_MAX;
}
}
}
void print(const char * name, const double* matrix, int row, int column)
{
printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++){
for (int j = 0; j < column; j++){
printf("%.3f ", matrix[j*row + i]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char * argv[])
{
int rowsA, colsB, common;
int i,j,k;
if (argc != 4){
printf("Using defaults\n");
rowsA = 2; colsB = 4; common = 6;
}
else{
rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
}
double A[rowsA * common]; double B[common * colsB];
double C[rowsA * colsB]; double D[rowsA * colsB];
enum CBLAS_ORDER order = CblasColMajor;
enum CBLAS_TRANSPOSE transA = CblasNoTrans;
enum CBLAS_TRANSPOSE transB = CblasNoTrans;
double one = 1.0, zero = 0.0;
srand(time(NULL));
init(A, rowsA, common); init(B, common, colsB);
cblas_dgemm(order,transA,transB, rowsA, colsB, common ,1.0,A,
rowsA ,B, common ,0.0,C, rowsA);
for(i=0;i<colsB;i++){
for(j=0;j<rowsA;j++){
D[i*rowsA+j]=0;
for(k=0;k<common;k++){
D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
}
}
}
print("A", A, rowsA, common); print("B", B, common, colsB);
print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
return 0;
}
To compile the program for ARCH(10):
xlc -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for
ARCH(10):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch10
-latlas.arch10 -qfloat=ieee -o sample
To compile the program for ARCH(11):
xlc -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(11):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch11
-latlas.arch11 -qfloat=ieee -o sample
To compile the program for ARCH(12) and higher ARCHITECTURE
levels:
xlc -c -qfloat=ieee -qround=n -qarch=12 -qtarget=zosv2r3 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(12) and higher ARCHITECTURE
levels:
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch12
-latlas.arch12 -qfloat=ieee -o sample
Sample 4
This program contains a C++ invocation
of the CBLAS function cblas_dgemm_
provided by the
ATLAS framework.
Observation: Same
result and functionality as if
dgemm_
would be called,
but this program uses the CBLAS version of the functions.Source
code:
#include <time.h>
#include <stdlib.h>
extern "C"
{
#include <cblas.h>
}
void init(double* matrix, int row, int column)
{
for (int j = 0; j < column; j++){
for (int i = 0; i < row; i++){
matrix[j*row + i] = ((double)rand())/RAND_MAX;
}
}
}
void print(const char * name, const double* matrix, int row, int column)
{
printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++){
for (int j = 0; j < column; j++){
printf("%.3f ", matrix[j*row + i]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char * argv[])
{
int rowsA, colsB, common;
int i,j,k;
if (argc != 4){
printf("Using defaults\n");
rowsA = 2; colsB = 4; common = 6;
}
else{
rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
}
double A[rowsA * common]; double B[common * colsB];
double C[rowsA * colsB]; double D[rowsA * colsB];
enum CBLAS_ORDER order = CblasColMajor;
enum CBLAS_TRANSPOSE transA = CblasNoTrans;
enum CBLAS_TRANSPOSE transB = CblasNoTrans;
double one = 1.0, zero = 0.0;
srand(time(NULL));
init(A, rowsA, common); init(B, common, colsB);
cblas_dgemm(order,transA,transB, rowsA, colsB, common ,1.0,A,
rowsA ,B, common ,0.0,C, rowsA);
for(i=0;i<colsB;i++){
for(j=0;j<rowsA;j++){
D[i*rowsA+j]=0;
for(k=0;k<common;k++){
D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
}
}
}
print("A", A, rowsA, common); print("B", B, common, colsB);
print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
return 0;
}
To compile the program for ARCH(10):
xlC -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for
ARCH(10):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch10
-latlas.arch10 -qfloat=ieee -o sample
To compile the program for ARCH(11):
xlC -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(11):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch11
-latlas.arch11 -qfloat=ieee -o sample
To compile the program for ARCH(12) and higher ARCHITECTURE
levels:
xlC -c -qfloat=ieee -qround=n -qarch=12 -qtarget=zosv2r3 -I
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(12) and higher ARCHITECTURE
levels:
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch12
-latlas.arch12 -qfloat=ieee -o sample