From b5602c02b1fb0ac26a32d32e94c2b17317df4fc3 Mon Sep 17 00:00:00 2001 From: Jure Bajic Date: Fri, 4 Oct 2024 16:04:25 +0200 Subject: [PATCH 01/11] Add vector performance test --- simple/run-vector.js | 23 ++++++++++ simple/test.js | 99 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 simple/run-vector.js diff --git a/simple/run-vector.js b/simple/run-vector.js new file mode 100644 index 0000000..4d698cb --- /dev/null +++ b/simple/run-vector.js @@ -0,0 +1,23 @@ +function main () { + global.returnValue = 0; + require("./simple/test").test({ + outputCsv: false, + medium: true, + runs: 1, + documents: false, + ioless: false, + edges: false, + search: false, + phrase: false, + noMaterializationSearch: false, + crud: false, + crudSearch: false, + subqueryTests: false, + mditests: false, + vectorTests: true + }); + return global.returnValue; +} +if (typeof arango !== "undefined") { + process.exit(main()); +} diff --git a/simple/test.js b/simple/test.js index f950e9e..4535764 100644 --- a/simple/test.js +++ b/simple/test.js @@ -16,6 +16,19 @@ function sum (values) { } } +function randomNumberGeneratorFloat(seed) { + const rng = (function* (seed) { + while (true) { + const nextVal = Math.cos(seed++); + yield nextVal; + } + })(seed); + + return function () { + return rng.next().value; + }; +} + function calc (values, options) { values.sort((a, b) => a - b); @@ -154,6 +167,10 @@ exports.test = function (global) { if (options.hasOwnProperty("iterations")) { params.iterations = options.iterations; } + if (options.hasOwnProperty("extras")) { + params.extras = options.extras; + } + return params; }; @@ -3320,6 +3337,31 @@ exports.test = function (global) { }, ]; + function vectorTest (params) { + let bindParam = { "@col": params.collection, "qp": params.extras.queryPoint }; + if ("bindParamModifier" in params) { + params.bindParamModifier(params, bindParam); + } + db._query( + params.queryString, + bindParam, + ); + } + + let VectorTests = [ + { + name: "aql-vector-top-k", + params: { + func: vectorTest, + queryString: ` + FOR d IN @@col + SORT APPROX_NEAR_L2(d.vector, @qp) + LIMIT 5 + RETURN d` + } + }, + ]; + const runSatelliteGraphTests = (global.satelliteGraphTests && isEnterprise && isCluster); if (global.documents || global.edges || global.noMaterializationSearch || global.subqueryTests || runSatelliteGraphTests) { @@ -3428,6 +3470,63 @@ exports.test = function (global) { runTestSuite("MDI", MdiTests, options); } + // vector tests + if (global.vectorTests) { + const dimension = 500; + let gen = randomNumberGeneratorFloat(12121243458923); + let randomPoint = Array.from({ length: dimension }, () => gen()); + + options = { + runs: global.runs, + digits: global.digits, + setup: function (params) { + db._drop(params.collection); + let col = db._create(params.collection); + + let docs = []; + for (let i = 0; i < params.collectionSize; ++i) { + const vector = Array.from({ length: dimension }, () => gen()); + if (i === 2000) { + randomPoint = vector; + } + docs.push({ vector }); + } + col.insert(docs); + + col.ensureIndex({ + name: "vector_l2", + type: "vector", + fields: ["vector"], + inBackground: false, + params: { metric: "l2", dimensions: dimension, nLists: params.extras.nLists }, + }); + + }, + teardown: function () {}, + collections: [], + removeFromResult: 1 + }; + + let extras = { queryPoint: randomPoint }; + + if (global.tiny) { + options.collections.push({ name: "Vectorvalues1000", label: "1k", size: 1000 }); + extras["nLists"]= 10; + } else if (global.small) { + options.collections.push({ name: "Vectorvalues10000", label: "10k", size: 10000 }); + extras["nLists"]= 10; + } else if (global.medium) { + options.collections.push({ name: "Vectorvalues100000", label: "100k", size: 100000 }); + extras["nLists"]= 100; + } else if (global.big) { + options.collections.push({ name: "Vectorvalues1000000", label: "1000k", size: 1000000 }); + extras["nLists"]= 100; + } + options.extras = extras; + + runTestSuite("Vector", VectorTests, options); + } + if (global.ioless) { options = { runs: global.runs, From 84c9617f09fd47788522beb285d89e467b2c8801 Mon Sep 17 00:00:00 2001 From: Jure Bajic Date: Fri, 4 Oct 2024 17:17:43 +0200 Subject: [PATCH 02/11] Add vector subquery test --- simple/run-vector.js | 4 ++-- simple/test.js | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/simple/run-vector.js b/simple/run-vector.js index 4d698cb..9f20996 100644 --- a/simple/run-vector.js +++ b/simple/run-vector.js @@ -2,8 +2,8 @@ function main () { global.returnValue = 0; require("./simple/test").test({ outputCsv: false, - medium: true, - runs: 1, + small: true, + runs: 3, documents: false, ioless: false, edges: false, diff --git a/simple/test.js b/simple/test.js index 4535764..06113cf 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3348,8 +3348,19 @@ exports.test = function (global) { ); } + function vectorTestNoParams (params) { + let bindParam = { "@col": params.collection }; + if ("bindParamModifier" in params) { + params.bindParamModifier(params, bindParam); + } + db._query( + params.queryString, + bindParam, + ); + } + let VectorTests = [ - { + { name: "aql-vector-top-k", params: { func: vectorTest, @@ -3359,6 +3370,23 @@ exports.test = function (global) { LIMIT 5 RETURN d` } + }, + { + name: "aql-vector-subquery-10-points", + params: { + func: vectorTestNoParams, + queryString: ` + FOR docOuter IN @@col + LIMIT 10 + LET neibhours = ( + FOR docInner IN @@col + LET dist = APPROX_NEAR_L2(docInner.vector, docOuter.vector) + SORT dist + LIMIT 10 + RETURN {dist, doc: docInner._key} + ) + RETURN {doc: docOuter._key, neibhours: neibhours}` + } }, ]; @@ -3473,7 +3501,7 @@ exports.test = function (global) { // vector tests if (global.vectorTests) { const dimension = 500; - let gen = randomNumberGeneratorFloat(12121243458923); + let gen = randomNumberGeneratorFloat(3243758343); let randomPoint = Array.from({ length: dimension }, () => gen()); options = { From 62dd02c7ff982bb5606277df62993ecade962da8 Mon Sep 17 00:00:00 2001 From: jbajic Date: Thu, 7 Nov 2024 12:21:04 +0100 Subject: [PATCH 03/11] Rename dimensions to dimension --- simple/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple/test.js b/simple/test.js index 06113cf..60dfb40 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3526,7 +3526,7 @@ exports.test = function (global) { type: "vector", fields: ["vector"], inBackground: false, - params: { metric: "l2", dimensions: dimension, nLists: params.extras.nLists }, + params: { metric: "l2", dimension: dimension, nLists: params.extras.nLists }, }); }, From f493dbadfb3c0e7ca7a29a5f20ef82f3ca530c57 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 3 Jul 2025 09:41:36 +0200 Subject: [PATCH 04/11] enable vector tests --- simple/run-big-all-runs1.js | 3 ++- simple/run-big-all.js | 3 ++- simple/run-medium-all-runs1.js | 3 ++- simple/run-medium-all.js | 3 ++- simple/run-small-all-junit.js | 3 ++- simple/run-small-all.js | 3 ++- simple/run-tiny-all.js | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/simple/run-big-all-runs1.js b/simple/run-big-all-runs1.js index 73eb5da..1dc4219 100644 --- a/simple/run-big-all-runs1.js +++ b/simple/run-big-all-runs1.js @@ -14,7 +14,8 @@ function main () { crud: true, crudSearch: true, subqueryTests: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } diff --git a/simple/run-big-all.js b/simple/run-big-all.js index b718d4b..572ced5 100644 --- a/simple/run-big-all.js +++ b/simple/run-big-all.js @@ -14,7 +14,8 @@ function main () { crud: true, crudSearch: true, subqueryTests: true, - mditests: true + mditests: true, + vectorTests: true }); print('oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo') print(GLOBAL.returnValue) diff --git a/simple/run-medium-all-runs1.js b/simple/run-medium-all-runs1.js index 01c95aa..bf06e55 100644 --- a/simple/run-medium-all-runs1.js +++ b/simple/run-medium-all-runs1.js @@ -14,7 +14,8 @@ function main () { crud: true, crudSearch: true, subqueryTests: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } diff --git a/simple/run-medium-all.js b/simple/run-medium-all.js index 43998ed..accaeb3 100644 --- a/simple/run-medium-all.js +++ b/simple/run-medium-all.js @@ -14,7 +14,8 @@ function main () { crud: true, crudSearch: true, subqueryTests: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } diff --git a/simple/run-small-all-junit.js b/simple/run-small-all-junit.js index 6ce7b02..7de9ee3 100644 --- a/simple/run-small-all-junit.js +++ b/simple/run-small-all-junit.js @@ -13,7 +13,8 @@ function main () { phrase: true, crud: true, crudSearch: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } diff --git a/simple/run-small-all.js b/simple/run-small-all.js index f085b27..46e42df 100644 --- a/simple/run-small-all.js +++ b/simple/run-small-all.js @@ -13,7 +13,8 @@ function main () { noMaterializationSearch: true, crud: true, crudSearch: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } diff --git a/simple/run-tiny-all.js b/simple/run-tiny-all.js index dd6808b..9e1c629 100644 --- a/simple/run-tiny-all.js +++ b/simple/run-tiny-all.js @@ -15,7 +15,8 @@ function main () { crud: true, crudSearch: true, subqueryTests: true, - mditests: true + mditests: true, + vectorTests: true }); return GLOBAL.returnValue; } From bfbb5e6a954172a9a42b651859157adac199588d Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 3 Jul 2025 16:54:52 +0200 Subject: [PATCH 05/11] reduce memory usage --- simple/test.js | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/simple/test.js b/simple/test.js index d425c65..53e8e47 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3524,15 +3524,20 @@ exports.test = function (testParams) { db._drop(params.collection); let col = db._create(params.collection); - let docs = []; - for (let i = 0; i < params.collectionSize; ++i) { - const vector = Array.from({ length: dimension }, () => gen()); - if (i === 2000) { - randomPoint = vector; + const batchSize = params.batchSize / 4; // we have big docs + const n = params.collectionSize / batchSize; + for (let i = 0; i < n / batchSize ; ++i) { + internal.wait(0, true); // garbage collect... + let docs = []; + for (let j = 0; j < batchSize; ++j) { + const vector = Array.from({ length: dimension }, () => gen()); + if (i * batchSize + j === 2000) { + randomPoint = vector; + } + docs.push({ vector }); } - docs.push({ vector }); + col.insert(docs); } - col.insert(docs); col.ensureIndex({ name: "vector_l2", From 4dfa425be8524e6eb0729bc8d5b1457729c381f3 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 3 Jul 2025 16:56:38 +0200 Subject: [PATCH 06/11] reduce memory usage --- simple/test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simple/test.js b/simple/test.js index 53e8e47..abac8dc 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3524,8 +3524,8 @@ exports.test = function (testParams) { db._drop(params.collection); let col = db._create(params.collection); - const batchSize = params.batchSize / 4; // we have big docs - const n = params.collectionSize / batchSize; + const batchSize = int(params.batchSize / 4); // we have big docs + const n = int(params.collectionSize / batchSize); for (let i = 0; i < n / batchSize ; ++i) { internal.wait(0, true); // garbage collect... let docs = []; From 56428fffd1a00de65fa2428bab9abc3766d448f6 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Fri, 4 Jul 2025 10:25:53 +0200 Subject: [PATCH 07/11] rount --- simple/test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simple/test.js b/simple/test.js index abac8dc..35d9f45 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3524,8 +3524,8 @@ exports.test = function (testParams) { db._drop(params.collection); let col = db._create(params.collection); - const batchSize = int(params.batchSize / 4); // we have big docs - const n = int(params.collectionSize / batchSize); + const batchSize = Math.round(params.batchSize / 4); // we have big docs + const n = Math.round(params.collectionSize / batchSize); for (let i = 0; i < n / batchSize ; ++i) { internal.wait(0, true); // garbage collect... let docs = []; From 9af5de70b6cb01c5487e6795c232bae34d9d63e0 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Fri, 4 Jul 2025 12:00:36 +0200 Subject: [PATCH 08/11] disable for older versions --- simple/test.js | 1 + 1 file changed, 1 insertion(+) diff --git a/simple/test.js b/simple/test.js index 35d9f45..8f2746d 100644 --- a/simple/test.js +++ b/simple/test.js @@ -149,6 +149,7 @@ exports.test = function (testParams) { // Substring first 5 characters to limit to A.B.C format and not use any `nightly`, `rc`, `preview` etc. const serverVersion = (((typeof arango) !== "undefined") ? arango.getVersion() : internal.version).split("-")[0]; testParams.zkdMdiRenamed = semver.satisfies(serverVersion, ">3.11.99") ; + testParams.vectorTests = testParams.vectorTests && semver.satisfies(serverVersion, ">3.12.3") ; const isEnterprise = internal.isEnterprise(); const isCluster = internal.isCluster(); From 5a57214cad4df9a707fb22669d61f274be111430 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Tue, 8 Jul 2025 10:43:49 +0200 Subject: [PATCH 09/11] add _key so the documents are distributed properly across the shards --- simple/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple/test.js b/simple/test.js index 8f2746d..95bcc1d 100644 --- a/simple/test.js +++ b/simple/test.js @@ -3535,7 +3535,7 @@ exports.test = function (testParams) { if (i * batchSize + j === 2000) { randomPoint = vector; } - docs.push({ vector }); + docs.push({_key: "test_" + (j + i* batchSize), vector: vector }); } col.insert(docs); } From c685e5b5d949ff1654f52a9f66caeefea1c101bc Mon Sep 17 00:00:00 2001 From: Jure Bajic Date: Tue, 15 Jul 2025 12:10:26 +0200 Subject: [PATCH 10/11] Fix vector index setup --- simple/test.js | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/simple/test.js b/simple/test.js index 95bcc1d..36ebf1f 100644 --- a/simple/test.js +++ b/simple/test.js @@ -148,8 +148,8 @@ exports.test = function (testParams) { // Substring first 5 characters to limit to A.B.C format and not use any `nightly`, `rc`, `preview` etc. const serverVersion = (((typeof arango) !== "undefined") ? arango.getVersion() : internal.version).split("-")[0]; - testParams.zkdMdiRenamed = semver.satisfies(serverVersion, ">3.11.99") ; - testParams.vectorTests = testParams.vectorTests && semver.satisfies(serverVersion, ">3.12.3") ; + testParams.zkdMdiRenamed = semver.satisfies(serverVersion, ">3.11.99"); + testParams.vectorTests = testParams.vectorTests && semver.satisfies(serverVersion, ">3.12.4"); const isEnterprise = internal.isEnterprise(); const isCluster = internal.isCluster(); @@ -199,6 +199,7 @@ exports.test = function (testParams) { for (let i = 0; i < runs + 1; ++i) { let params = buildParams(test, collection); if (typeof options.setup === "function") { + print("Running setup function!"); options.setup(params); } if (typeof params.setup === "function") { @@ -232,7 +233,7 @@ exports.test = function (testParams) { let errors = []; for (let i = 0; i < tests.length; ++i) { let test = tests[i]; - print(test) + print(test); try { if (!(test['version'] === undefined || semver.satisfies(serverVersion, test['version']))) { print(`skipping test ${test['name']}, requires version ${test['version']}`); @@ -3524,10 +3525,11 @@ exports.test = function (testParams) { setup: function (params) { db._drop(params.collection); let col = db._create(params.collection); - - const batchSize = Math.round(params.batchSize / 4); // we have big docs + + const batchSize = 1000; const n = Math.round(params.collectionSize / batchSize); - for (let i = 0; i < n / batchSize ; ++i) { + print("Preparing vector collection with " + params.collectionSize + " documents and batchSize: " + batchSize); + for (let i = 0; i < n; ++i) { internal.wait(0, true); // garbage collect... let docs = []; for (let j = 0; j < batchSize; ++j) { @@ -3539,7 +3541,9 @@ exports.test = function (testParams) { } col.insert(docs); } + print("Number of docs in vector index collection: " + col.count()); + print("Creating vector index"); col.ensureIndex({ name: "vector_l2", type: "vector", @@ -3547,7 +3551,7 @@ exports.test = function (testParams) { inBackground: false, params: { metric: "l2", dimension: dimension, nLists: params.extras.nLists }, }); - + print("Vector index created"); }, teardown: function () {}, collections: [], @@ -3558,16 +3562,16 @@ exports.test = function (testParams) { if (testParams.tiny) { options.collections.push({ name: "Vectorvalues1000", label: "1k", size: 1000 }); - extras["nLists"]= 10; + extras["nLists"] = 10; } else if (testParams.small) { options.collections.push({ name: "Vectorvalues10000", label: "10k", size: 10000 }); - extras["nLists"]= 10; + extras["nLists"] = 100; } else if (testParams.medium) { options.collections.push({ name: "Vectorvalues100000", label: "100k", size: 100000 }); - extras["nLists"]= 100; + extras["nLists"] = 1000; } else if (testParams.big) { options.collections.push({ name: "Vectorvalues1000000", label: "1000k", size: 1000000 }); - extras["nLists"]= 100; + extras["nLists"] = 10000; } options.extras = extras; From 779c648200f741564b3bcd350668db8745bdb1b4 Mon Sep 17 00:00:00 2001 From: Jure Bajic Date: Tue, 15 Jul 2025 13:55:40 +0200 Subject: [PATCH 11/11] Add additional print --- simple/test.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/simple/test.js b/simple/test.js index 36ebf1f..43e4435 100644 --- a/simple/test.js +++ b/simple/test.js @@ -199,7 +199,6 @@ exports.test = function (testParams) { for (let i = 0; i < runs + 1; ++i) { let params = buildParams(test, collection); if (typeof options.setup === "function") { - print("Running setup function!"); options.setup(params); } if (typeof params.setup === "function") { @@ -3551,7 +3550,7 @@ exports.test = function (testParams) { inBackground: false, params: { metric: "l2", dimension: dimension, nLists: params.extras.nLists }, }); - print("Vector index created"); + print("Vector index created: " + JSON.stringify(col.indexes())); }, teardown: function () {}, collections: [],