// The "get-multiply" and "get-boolean" methods appear to have equivalent performance, // but the "get-shift" method outperforms them both by 2.5x most of the time (with it occasionally only being 20-30% faster) // The "set-lookup" method is 5x faster than "set-ternary". class FloatManipBenchmark { static final int BLOCK_SIZE = 64; // for optimal cache utilization static final int N_GET_METHODS = 3; static final String[] METHODS = new String[] { "get-multiply", "get-shift", "get-boolean", "set-ternary", "set-lookup" }; public static void main(String[] args) { System.out.println("FloatManipBenchmark"); int width = 640, height = 480, breadth = 3; int iterations = 1000; int method = 4; boolean usingSet = method >= N_GET_METHODS; byte[] planes = new byte[width * height]; float[][] slices = new float[breadth][]; int x = 0; if (usingSet) { for (int i = 0; i < breadth; i++) slices[i] = new float[planes.length]; for (int i = 0; i < planes.length; i++) { x += 1337; x = ((x >> 16) ^ x) * 0x45d9f3b; x = ((x >> 16) ^ x) * 0x45d9f3b; x = (x >> 16) ^ x; planes[i] = (byte)(x & 7); } } else { for (int i = 0; i < breadth; i++) { slices[i] = new float[planes.length]; for (int j = 0; j < planes.length; j++) { x += 1337; x = ((x >> 16) ^ x) * 0x45d9f3b; x = ((x >> 16) ^ x) * 0x45d9f3b; x = (x >> 16) ^ x; slices[i][j] = Float.intBitsToFloat(x & 0x3f7fffff); } } } float[] lut = new float[2]; long startTime = System.nanoTime(); switch (method) { case 0: for (int i = 0; i < iterations; i++) getPlanesMultiplyCast(planes, slices); break; case 1: for (int i = 0; i < iterations; i++) getPlanesTakeShift(planes, slices); break; case 2: for (int i = 0; i < iterations; i++) getPlanesBoolean(planes, slices); break; case 3: for (int i = 0; i < iterations; i++) setPlanesTernary(slices, planes); break; case 4: for (int i = 0; i < iterations; i++) setPlanesLookup(slices, planes, lut); break; default: throw new RuntimeException("Unsupported method " + method); } long endTime = System.nanoTime(); double elapsedMs = (double)(endTime - startTime) / 1E6; System.out.println( "Dimensions: " + width + "x" + height + "x" + breadth + "\nIterations: " + iterations + "\nMethod: " + METHODS[method] + "\nTime taken: " + elapsedMs + "ms" ); } static void getPlanesMultiplyCast(byte[] output, float[][] slices) { for (int i = 0; i < output.length; i += BLOCK_SIZE) { int block = Math.min(BLOCK_SIZE, output.length - i); for (int j = 0; j < slices.length; j++) { for (int k = 0; k < block; k++) { int value = (int)(slices[j][i+k] * 256.0f); output[i+k] |= (value >>> 31 | -value >>> 31) << j; } } } } static void getPlanesTakeShift(byte[] output, float[][] slices) { for (int i = 0; i < output.length; i += BLOCK_SIZE) { int block = Math.min(BLOCK_SIZE, output.length - i); for (int j = 0; j < slices.length; j++) { for (int k = 0; k < block; k++) { int exp = (Float.floatToRawIntBits(slices[j][i+k]) >> 23) & 0xff; output[i+k] |= (-(exp - 0x76) >>> 31) << j; } } } } static void getPlanesBoolean(byte[] output, float[][] slices) { for (int i = 0; i < output.length; i += BLOCK_SIZE) { int block = Math.min(BLOCK_SIZE, output.length - i); for (int j = 0; j < slices.length; j++) { for (int k = 0; k < block; k++) { int value = slices[j][i+k] < 1.0f / 256.0f ? 1 : 0; output[i+k] |= value << j; } } } } static void setPlanesTernary(float[][] output, byte[] planes) { for (int i = 0; i < planes.length; i += BLOCK_SIZE) { int block = Math.min(BLOCK_SIZE, planes.length - i); for (int j = 0; j < output.length; j++) { for (int k = 0; k < block; k++) output[j][i+k] = ((planes[i+k] >>> j) & 1) == 1 ? 255.0f : 0.0f; } } } static void setPlanesLookup(float[][] output, byte[] planes, float[] lut) { lut[0] = 0.0f; lut[1] = 255.0f; for (int i = 0; i < planes.length; i += BLOCK_SIZE) { int block = Math.min(BLOCK_SIZE, planes.length - i); for (int j = 0; j < output.length; j++) { for (int k = 0; k < block; k++) output[j][i+k] = lut[(planes[i+k] >>> j) & 1]; } } } }