I'm having trouble explaining why my performance test return significantly different results on 2 different types of run.
Steps to reproduce issue:
- Get the code from gist: https://gist.github.com/AVAVT/83685bfe5280efc7278465f90657b9ea
- Run
node practice1.generator
- Run
node practice1.performance-test
practice1.generator
should generate a test-data.json
file, and log some searching algorithm execution time into the console.
After that, practice1.performance-test
reads from test-data.json
, and perform the exact same evaluation function on that same data.
The output on my machine is consistently similar to this:
> node practice1.generator
Generate time: 9,307,061,368 nanoseconds
Total time using indexOf : 7,005,750 nanoseconds
Total time using for loop : 7,463,967 nanoseconds
Total time using binary search : 1,741,822 nanoseconds
Total time using interpolation search: 915,532 nanoseconds
> node practice1.performance-test
Total time using indexOf : 11,574,993 nanoseconds
Total time using for loop : 8,765,902 nanoseconds
Total time using binary search : 2,365,598 nanoseconds
Total time using interpolation search: 771,005 nanoseconds
Note the difference in execution time in the case of indexOf
and binary search
comparing to the other algorithms.
If I repeatedly run node practice1.generator
or node practice1.performance-test
, the result is quite consistent though.
Now this is so troubling, I can't find a way to figure out which result is credible, and why such differences occur. Is it caused by a difference between the generated test array vs JSON.parse-d test array; or is it caused by process.hrtime()
; or is it some unknown reason I couldn't even fathom?
Update: I have traced the cause of the indexOf
case to be because of JSON.parse
. Inside practice1.generator
, the tests
array is the original generated array; while in practice1.performance-test
the array is read from the json file and is probably different from the original array somehow.
If within practice1.generator
I instead JSON.parse()
a new array from the string:
var tests2 = JSON.parse(JSON.stringify(tests));
performanceUtil.performanceTest(tests2);
The execution time of indexOf
is now consistent on both files.
> node practice1.generator
Generate time: 9,026,080,466 nanoseconds
Total time using indexOf : 11,016,420 nanoseconds
Total time using for loop : 8,534,540 nanoseconds
Total time using binary search : 1,586,780 nanoseconds
Total time using interpolation search: 742,460 nanoseconds
> node practice1.performance-test
Total time using indexOf : 11,423,556 nanoseconds
Total time using for loop : 8,509,602 nanoseconds
Total time using binary search : 2,303,099 nanoseconds
Total time using interpolation search: 718,723 nanoseconds
So at least I know indexOf
run better on the original array and worse on a JSON.parse
-d array. Still I only know the reason, no clue why.
The Binary search execution time remain different on 2 files, consistently taking ~1.7ms in practice1.generator
(even when using a JSON.parse
-d object) and ~2.3ms in practice1.performance-test
.
Below is the same code as in the gist, provided for future reference purpose.
performance-utils.js:
'use strict';
const performanceTest = function(tests){
var tindexOf = process.hrtime();
tests.forEach(testcase => {
var result = testcase.input.indexOf(testcase.target);
if(result !== testcase.output) console.log("Errr", result, testcase.output);
});
tindexOf = process.hrtime(tindexOf);
var tmanual = process.hrtime();
tests.forEach(testcase => {
const arrLen = testcase.input.length;
var result = -1;
for(var i=0;i<arrLen;i++){
if(testcase.input[i] === testcase.target){
result = i;
break;
}
}
if(result !== testcase.output) console.log("Errr", result, testcase.output);
});
tmanual = process.hrtime(tmanual);
var tbinary = process.hrtime();
tests.forEach(testcase => {
var max = testcase.input.length-1;
var min = 0;
var check, num;
var result = -1;
while(max => min){
check = Math.floor((max+min)/2);
num = testcase.input[check];
if(num === testcase.target){
result = check;
break;
}
else if(num > testcase.target) max = check-1;
else min = check+1;
}
if(result !== testcase.output) console.log("Errr", result, testcase.output);
});
tbinary = process.hrtime(tbinary);
var tinterpolation = process.hrtime();
tests.forEach(testcase => {
var max = testcase.input.length-1;
var min = 0;
var result = -1;
var check, num;
while(max > min && testcase.target >= testcase.input[min] && testcase.target <= testcase.input[max]){
check = min + Math.round((max-min) * (testcase.target - testcase.input[min]) / (testcase.input[max]-testcase.input[min]));
num = testcase.input[check];
if(num === testcase.target){
result = check;
break;
}
else if(testcase.target > num) min = check + 1;
else max = check - 1;
}
if(result === -1 && testcase.input[max] == testcase.target) result = max;
if(result !== testcase.output) console.log("Errr", result, testcase.output);
});
tinterpolation = process.hrtime(tinterpolation);
console.log(`Total time using indexOf : ${(tindexOf[0] * 1e9 + tindexOf[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
console.log(`Total time using for loop : ${(tmanual[0] * 1e9 + tmanual[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
console.log(`Total time using binary search : ${(tbinary[0] * 1e9 + tbinary[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
console.log(`Total time using interpolation search: ${(tinterpolation[0] * 1e9 + tinterpolation[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
}
module.exports = { performanceTest }
practice1.generator.js:
'use strict';
require('util');
const performanceUtil = require('./performance-utils');
const fs = require('fs');
const path = require('path');
const outputFilePath = path.join(__dirname, process.argv[3] || 'test-data.json');
const AMOUNT_TO_GENERATE = parseInt(process.argv[2] || 1000);
// Make sure ARRAY_LENGTH_MAX < (MAX_NUMBER - MIN_NUMBER)
const ARRAY_LENGTH_MIN = 10000;
const ARRAY_LENGTH_MAX = 18000;
const MIN_NUMBER = -10000;
const MAX_NUMBER = 10000;
const candidates = Array.from(Array(MAX_NUMBER - MIN_NUMBER + 1), (item, index) => MIN_NUMBER + index);
function createNewTestcase(){
var input = candidates.slice();
const lengthToGenerate = Math.floor(Math.random()*(ARRAY_LENGTH_MAX - ARRAY_LENGTH_MIN + 1)) + ARRAY_LENGTH_MIN;
while(input.length > lengthToGenerate){
input.splice(Math.floor(Math.random()*input.length), 1);
}
const notfound = input.length === lengthToGenerate ?
input.splice(Math.floor(Math.random()*input.length), 1)[0] : MIN_NUMBER-1;
const output = Math.floor(Math.random()*(input.length+1)) - 1;
const target = output === -1 ? notfound : input[output];
return {
input,
target,
output
};
}
var tgen = process.hrtime();
var tests = [];
while(tests.length < AMOUNT_TO_GENERATE){
tests.push(createNewTestcase());
}
fs.writeFileSync(outputFilePath, JSON.stringify(tests));
var tgen = process.hrtime(tgen);
console.log(`Generate time: ${(tgen[0] * 1e9 + tgen[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
performanceUtil.performanceTest(tests);
practice1.performance-test.js:
'use strict';
require('util');
const performanceUtil = require('./performance-utils');
const fs = require('fs');
const path = require('path');
const outputFilePath = path.join(__dirname, process.argv[2] || 'test-data.json');
var tests = JSON.parse(fs.readFileSync(outputFilePath));
performanceUtil.performanceTest(tests);
As you have already noticed, the performance difference leads to the comparison:
generated array
vsJSON.parse
d. What we have in both cases: same arrays with same numbers? So the look up performance must be the same? No.There are several very nice articles about the data types:
So why arrays created by
JSON.parse
are slow? The parser, when creating values, doesn't properly optimize the data structures, and as the result we getuntagged
arrays withboxed
doubles. But we can optimize the arrays afterwards withArray.from
and in your case, same as the generated arrays, you getsmi
arrays withsmi
numbers. Here is an example based on your sample.Run it with
node --allow-natives-syntax test.js