I am doing some performance research in the fork / join framework in Java 7. To improve the test results, I want to use different recursive algorithms during the tests. One of them is matrix multiplication.
I downloaded the following example from the Doug Lea () website:
public class MatrixMultiply {
static final int DEFAULT_GRANULARITY = 16;
static int granularity = DEFAULT_GRANULARITY;
public static void main(String[] args) {
final String usage = "Usage: java MatrixMultiply <threads> <matrix size (must be a power of two)> [<granularity>] \n Size and granularity must be powers of two.\n For example, try java MatrixMultiply 2 512 16";
try {
int procs;
int n;
try {
procs = Integer.parseInt(args[0]);
n = Integer.parseInt(args[1]);
if (args.length > 2) granularity = Integer.parseInt(args[2]);
}
catch (Exception e) {
System.out.println(usage);
return;
}
if ( ((n & (n - 1)) != 0) ||
((granularity & (granularity - 1)) != 0) ||
granularity < 2) {
System.out.println(usage);
return;
}
float[][] a = new float[n][n];
float[][] b = new float[n][n];
float[][] c = new float[n][n];
init(a, b, n);
FJTaskRunnerGroup g = new FJTaskRunnerGroup(procs);
g.invoke(new Multiplier(a, 0, 0, b, 0, 0, c, 0, 0, n));
g.stats();
}
catch (InterruptedException ex) {}
}
static void init(float[][] a, float[][] b, int n) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
a[i][j] = 1.0F;
b[i][j] = 1.0F;
}
}
}
static void check(float[][] c, int n) {
for (int i = 0; i < n; i++ ) {
for (int j = 0; j < n; j++ ) {
if (c[i][j] != n) {
throw new Error("Check Failed at [" + i +"]["+j+"]: " + c[i][j]);
}
}
}
}
static class Multiplier extends FJTask {
final float[][] A;
final int aRow;
final int aCol;
final float[][] B;
final int bRow;
final int bCol;
final float[][] C;
final int cRow;
final int cCol;
final int size;
Multiplier(float[][] A, int aRow, int aCol,
float[][] B, int bRow, int bCol,
float[][] C, int cRow, int cCol,
int size) {
this.A = A; this.aRow = aRow; this.aCol = aCol;
this.B = B; this.bRow = bRow; this.bCol = bCol;
this.C = C; this.cRow = cRow; this.cCol = cCol;
this.size = size;
}
public void run() {
if (size <= granularity) {
multiplyStride2();
}
else {
int h = size / 2;
coInvoke(new FJTask[] {
seq(new Multiplier(A, aRow, aCol,
B, bRow, bCol,
C, cRow, cCol,
h),
new Multiplier(A, aRow, aCol+h,
B, bRow+h, bCol,
C, cRow, cCol,
h)),
seq(new Multiplier(A, aRow, aCol,
B, bRow, bCol+h,
C, cRow, cCol+h,
h),
new Multiplier(A, aRow, aCol+h,
B, bRow+h, bCol+h,
C, cRow, cCol+h,
h)),
seq(new Multiplier(A, aRow+h, aCol,
B, bRow, bCol,
C, cRow+h, cCol,
h),
new Multiplier(A, aRow+h, aCol+h,
B, bRow+h, bCol,
C, cRow+h, cCol,
h)),
seq(new Multiplier(A, aRow+h, aCol,
B, bRow, bCol+h,
C, cRow+h, cCol+h,
h),
new Multiplier(A, aRow+h, aCol+h,
B, bRow+h, bCol+h,
C, cRow+h, cCol+h,
h))
});
}
}
void multiplyStride2() {
for (int j = 0; j < size; j+=2) {
for (int i = 0; i < size; i +=2) {
float[] a0 = A[aRow+i];
float[] a1 = A[aRow+i+1];
float s00 = 0.0F;
float s01 = 0.0F;
float s10 = 0.0F;
float s11 = 0.0F;
for (int k = 0; k < size; k+=2) {
float[] b0 = B[bRow+k];
s00 += a0[aCol+k] * b0[bCol+j];
s10 += a1[aCol+k] * b0[bCol+j];
s01 += a0[aCol+k] * b0[bCol+j+1];
s11 += a1[aCol+k] * b0[bCol+j+1];
float[] b1 = B[bRow+k+1];
s00 += a0[aCol+k+1] * b1[bCol+j];
s10 += a1[aCol+k+1] * b1[bCol+j];
s01 += a0[aCol+k+1] * b1[bCol+j+1];
s11 += a1[aCol+k+1] * b1[bCol+j+1];
}
C[cRow+i] [cCol+j] += s00;
C[cRow+i] [cCol+j+1] += s01;
C[cRow+i+1][cCol+j] += s10;
C[cRow+i+1][cCol+j+1] += s11;
}
}
}
}
}
This code is written for an older version of the fork / join framework. So I have to rewrite it. My rewritten code implements my own interface and looks like this:
public class Java7MatrixMultiply implements Algorithm {
private static final int SIZE = 32;
private static final int THRESHOLD = 8;
private float[][] a = new float[SIZE][SIZE];
private float[][] b = new float[SIZE][SIZE];
private float[][] c = new float[SIZE][SIZE];
ForkJoinPool forkJoinPool;
@Override
public void initialize() {
init(a, b, SIZE);
}
@Override
public void execute() {
MatrixMultiplyTask mainTask = new MatrixMultiplyTask(a, 0, 0, b, 0, 0, c, 0, 0, SIZE);
forkJoinPool = new ForkJoinPool();
forkJoinPool.invoke(mainTask);
System.out.println("Terminated!");
}
@Override
public void printResult() {
check(c, SIZE);
for (int i = 0; i < SIZE; i++) {
for (int j = 0; j < SIZE; j++) {
System.out.print(c[i][j] + " ");
}
System.out.println();
}
}
static void init(float[][] a, float[][] b, int n) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
a[i][j] = 1.0F;
b[i][j] = 1.0F;
}
}
}
static void check(float[][] c, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (c[i][j] != n) {
System.out.println("Check Failed at [" + i + "][" + j + "]: " + c[i][j]);
}
}
}
}
private class MatrixMultiplyTask extends RecursiveAction {
private final float[][] A;
private final int aRow;
private final int aCol;
private final float[][] B;
private final int bRow;
private final int bCol;
private final float[][] C;
private final int cRow;
private final int cCol;
private final int size;
MatrixMultiplyTask(float[][] A, int aRow, int aCol, float[][] B,
int bRow, int bCol, float[][] C, int cRow, int cCol, int size) {
this.A = A;
this.aRow = aRow;
this.aCol = aCol;
this.B = B;
this.bRow = bRow;
this.bCol = bCol;
this.C = C;
this.cRow = cRow;
this.cCol = cCol;
this.size = size;
}
@Override
protected void compute() {
if (size <= THRESHOLD) {
multiplyStride2();
} else {
int h = size / 2;
invokeAll(new MatrixMultiplyTask[] {
new MatrixMultiplyTask(A, aRow, aCol,
B, bRow, bCol,
C, cRow, cCol,
h),
new MatrixMultiplyTask(A, aRow, aCol + h,
B, bRow + h, bCol,
C, cRow, cCol,
h),
new MatrixMultiplyTask(A, aRow, aCol,
B, bRow, bCol + h,
C, cRow, cCol + h,
h),
new MatrixMultiplyTask(A, aRow, aCol + h,
B, bRow + h, bCol + h,
C, cRow, cCol + h,
h),
new MatrixMultiplyTask(A, aRow + h, aCol,
B, bRow, bCol,
C, cRow + h, cCol,
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h,
B, bRow + h, bCol,
C, cRow + h, cCol,
h),
new MatrixMultiplyTask(A, aRow + h, aCol,
B, bRow, bCol + h,
C, cRow + h, cCol + h,
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h,
B, bRow + h, bCol + h,
C, cRow + h, cCol + h,
h) });
}
}
void multiplyStride2() {
for (int j = 0; j < size; j += 2) {
for (int i = 0; i < size; i += 2) {
float[] a0 = A[aRow + i];
float[] a1 = A[aRow + i + 1];
float s00 = 0.0F;
float s01 = 0.0F;
float s10 = 0.0F;
float s11 = 0.0F;
for (int k = 0; k < size; k += 2) {
float[] b0 = B[bRow + k];
s00 += a0[aCol + k] * b0[bCol + j];
s10 += a1[aCol + k] * b0[bCol + j];
s01 += a0[aCol + k] * b0[bCol + j + 1];
s11 += a1[aCol + k] * b0[bCol + j + 1];
float[] b1 = B[bRow + k + 1];
s00 += a0[aCol + k + 1] * b1[bCol + j];
s10 += a1[aCol + k + 1] * b1[bCol + j];
s01 += a0[aCol + k + 1] * b1[bCol + j + 1];
s11 += a1[aCol + k + 1] * b1[bCol + j + 1];
}
C[cRow + i][cCol + j] += s00;
C[cRow + i][cCol + j + 1] += s01;
C[cRow + i + 1][cCol + j] += s10;
C[cRow + i + 1][cCol + j + 1] += s11;
}
}
}
}
}
. , . . , - , , Seq. Seq , invokeAll(). fork/join. , , . ?