diff --git a/simple/BIGvertices.js b/simple/BIGvertices.js deleted file mode 100644 index 159f8d0..0000000 --- a/simple/BIGvertices.js +++ /dev/null @@ -1,93 +0,0 @@ -// This script can create a binary tree in ArangoDB with relatively large -// vertex documents (approx. 1 MB each). You can give the depth and you can -// choose what type of graph to create. -// -// Usage: -// -// makeGraph("G", "V", "E") - creates a general graph with name G, vertex -// collection V and edge collection E -// makeTree(6, "V", "E") - creates the actual tree after the graph was created -// 6 is the depth and one has to name the vertex and -// edge collections -// -// The following AQL query and its performance could be of interest: -// -// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" -// RETURN v.data -// -// This traverses the whole graph starting from the root but retrieves only -// a tiny part of the vertex data. This tests the 3.10 feature of -// traversal projections. You can see that it does this from this explain -// output for the above query: -// -// Query String (58 chars, cacheable: true): -// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" -// RETURN v.data -// -// Execution plan: -// Id NodeType Site Est. Comment -// 1 SingletonNode COOR 1 * ROOT -// 2 TraversalNode COOR 64 - FOR v /* vertex (projections: `data`) */ IN 0..6 /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */ GRAPH 'G' -// 3 CalculationNode COOR 64 - LET #3 = v.`data` /* attribute expression */ -// 4 ReturnNode COOR 64 - RETURN #3 -// -// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `data`. -// - -rand = require("internal").rand; -time = require("internal").time; - -function makeRandomString(l) { - var r = rand(); - var d = rand(); - var s = "x"; - for (var i = 0; i < l; ++i) { - s += r; - r += d; - } - return s; -} - -function makeGraph(graphName, vertexCollName, edgeCollName) { - let graph = require("@arangodb/general-graph"); - try { - graph._drop(graphName, true); - } - catch { - } - graph._create(graphName, [graph._relation(edgeCollName, [vertexCollName], [vertexCollName])]); -} - -function makeKey(i) { - return "S" + (i % 3) + ":K" + i; -} - -function makeTree(depth, vertexCollName, edgeCollName) { - let V = db._collection(vertexCollName); - let E = db._collection(edgeCollName); - let klumpen = {}; - for (let i = 0; i < 1000; ++i) { - klumpen["K"+i] = makeRandomString(1024); - } - for (let i = 1; i <= 2 ** depth - 1; ++i) { - let v = klumpen; - v.data = "D"+i; - v.smart = "S"+(i % 3); - v._key = makeKey(i); - V.insert(v); - print("Have created", i, "vertices out of", 2 ** depth - 1); - } - - // This is now a gigabyte of data, one megabyte per vertex. - - // Make a binary tree: - for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) { - let e = { _from: vertexCollName + "/" + makeKey(i), - _to: vertexCollName + "/" + makeKey(2 * i)}; - E.insert(e); - e = { _from: vertexCollName + "/" + makeKey(i), - _to: vertexCollName + "/" + makeKey(2 * i + 1)}; - E.insert(e); - } - -} diff --git a/simple/binaryTrees.js b/simple/binaryTrees.js new file mode 100644 index 0000000..b29e82a --- /dev/null +++ b/simple/binaryTrees.js @@ -0,0 +1,126 @@ +rand = require("internal").rand; +time = require("internal").time; + +function makeRandomString(l) { + var r = rand(); + var d = rand(); + var s = "x"; + for (var i = 0; i < l; ++i) { + s += r; + r += d; + } + return s; +} + +function numberOfDbservers() { + return Object.values(db._connection.GET("/_admin/cluster/health").Health).filter(item => item.Role == "DBServer").length; +} + +function createGraph(graphName, vertexCollName, edgeCollName) { + let graph = require("@arangodb/general-graph"); + try { + graph._drop(graphName, true); + } + catch { + } + graph._create(graphName, [graph._relation(edgeCollName, [vertexCollName], [vertexCollName])], [], {numberOfShards: numberOfDbservers()}); +} + +function makeKey(i) { + return "S" + (i % 3) + ":K" + i; +} + +// creates a binary tree where every vertex includes one megabyte of data +// +// The following AQL query and its performance could be of interest: +// +// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" +// RETURN v.data +// +// This traverses the whole graph starting from the root but retrieves only +// a tiny part of the vertex data. This tests the 3.10 feature of +// traversal projections. You can see that it does this from this explain +// output for the above query: +// +// Query String (58 chars, cacheable: true): +// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" +// RETURN v.smallData +// +// Execution plan: +// Id NodeType Site Est. Comment +// 1 SingletonNode COOR 1 * ROOT +// 2 TraversalNode COOR 64 - FOR v /* vertex (projections: `data`) */ IN 0..6 /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */ GRAPH 'G' +// 3 CalculationNode COOR 64 - LET #3 = v.`smallData` /* attribute expression */ +// 4 ReturnNode COOR 64 - RETURN #3 +// +// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `smallData`. +function makeTreeWithLargeData(graphName, vertexCollName, edgeCollName, depth) { + createGraph(graphName, vertexCollName, edgeCollName); + let V = db._collection(vertexCollName); + let E = db._collection(edgeCollName); + + // create vertices + let klumpen = {}; + for (let i = 0; i < 1000; ++i) { + klumpen["K"+i] = makeRandomString(1024); + } + for (let i = 1; i <= 2 ** depth - 1; ++i) { + let v = klumpen; + v.smallData = "D"+i; + v.smart = "S"+(i % 3); + v._key = makeKey(i); + V.insert(v); + print("Have created", i, "vertices out of", 2 ** depth - 1); + } + + // make a binary tree from these vertices + for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) { + let e = { _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i)}; + E.insert(e); + e = { _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i + 1)}; + E.insert(e); + } +} + +// creates a binary tree with vertex 2 beeing a supernode +// 1 +// / \ +// 3 2 with additional superNodeSize neighbours +// / \ / \ +// 7 6 5 4 +// ... +function makeTreeWithSupernode(graphName, vertexCollName, edgeCollName, depth, superNodeSize) { + createGraph(graphName, vertexCollName, edgeCollName); + let V = db._collection(vertexCollName); + let E = db._collection(edgeCollName); + + // Add 2^depth - 1 vertices for tree and additionally superNodeSize vertices + let docs = [] + for (let i = 1; i <= 2**depth-1+superNodeSize; ++i) { + docs.push({data: "D"+i, smart: "S"+(i%3), _key: makeKey(i)}); + } + V.insert(docs); + + // make a binary tree from the first 2^depth - 1 vertices + docs = []; + for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) { + docs.push({ _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i)}); + docs.push({ _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i + 1)}); + + } + E.insert(docs); + + // make vertex 2 a supernode + if (depth > 1) { + docs = []; + let key = makeKey(2); + for (let j=1; j <= superNodeSize; j++) { + docs.push({_from: vertexCollName + "/" + key, _to: vertexCollName + "/" + (2**depth - 1+j)}); + } + E.insert(docs); + } +} diff --git a/simple/test.js b/simple/test.js index f1277c1..80f2292 100644 --- a/simple/test.js +++ b/simple/test.js @@ -1,6 +1,6 @@ "use strict"; /* jshint globalstrict:false, strict:false, maxlen: 500 */ -/* global GLOBAL, makeGraph, makeTree */ +/* global GLOBAL, makeTreeWithLargeData, makeTreeWithSupernode */ const internal = require("internal"); const arango = internal.arango; @@ -9,10 +9,10 @@ const fs = require("fs"); const semver = require("semver"); const _ = require("lodash"); const db = require("org/arangodb").db; -require("internal").load("simple/BIGvertices.js");// makeGraph, makeTree +require("internal").load("simple/binaryTrees.js");// makeTreeWithLargeData, makeTreeWithSupernode GLOBAL.returnValue = 0; - +var supernodeTreeDepth = 0; // required for supernode_limit test function sum (values) { if (values.length > 1) { @@ -22,6 +22,10 @@ function sum (values) { } } +function numberOfDbservers() { + return Object.values(db._connection.GET("/_admin/cluster/health").Health).filter(item => item.Role == "DBServer").length; +} + function randomNumberGeneratorFloat (seed) { const rng = (function *(seed) { while (true) { @@ -513,16 +517,19 @@ exports.test = function (testParams) { createEdges(1000); } else if (testParams.small) { createEdges(10000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(6, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 6); + supernodeTreeDepth = 4; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 5000); } else if (testParams.medium) { createEdges(100000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(7, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 7); + supernodeTreeDepth = 5; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 10000); } else if (testParams.big) { createEdges(1000000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(8, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 8); + supernodeTreeDepth = 6; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 100000); } internal.wal.flush(true, true); @@ -546,7 +553,7 @@ exports.test = function (testParams) { edges: db[edgesCollectionName] }; } - let g = graphModule._create(name, [ graphModule._relation(edgesCollectionName, vertexCollectionName, vertexCollectionName)], [], {}); + let g = graphModule._create(name, [ graphModule._relation(edgesCollectionName, vertexCollectionName, vertexCollectionName)], [], {numberOfShards: numberOfDbservers()}); return { graph: g, vertex: g[vertexCollectionName], edges: db[edgesCollectionName] }; @@ -584,7 +591,7 @@ exports.test = function (testParams) { edges: db[edgesCollectionName] }; } - let g = graphModule._create(name, [ graphModule._relation(edgesCollectionName, vertexCollectionName, vertexCollectionName)], [], {}); + let g = graphModule._create(name, [ graphModule._relation(edgesCollectionName, vertexCollectionName, vertexCollectionName)], [], {numberOfShards: numberOfDbservers()}); return { graph: g, vertex: g[vertexCollectionName], edges: db[edgesCollectionName] }; @@ -1034,7 +1041,7 @@ exports.test = function (testParams) { traversalProjections = function (params) { // Note that depth 8 is good for all three sizes small (6), medium (7) // and big (8). Depending on the size, we create a different tree. - db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.data`, {}, {}, {silent}); + db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.smallData`, {}, {}, {silent}); }, outbound = function (params) { @@ -1153,6 +1160,28 @@ exports.test = function (testParams) { { silent } ); }, + supernode = function (params) { + db._query(`FOR v IN 0..8 OUTBOUND "SupernodeV/S1:K1" GRAPH "Supernode" RETURN v.data`, + {}, + {}, + {silent} + ); + }, + supernode_limit = function (params) { + // limit output vertices and make sure that at least one of the supernodes's neighbours is in the result but not all of the supernode's neighbours + // dfs first enumerates all vertices in one half-tree, then the other all vertices in the other half-tree + // if the supernode is in the first half-tree, limit should be smaller than the number of supernode neighbours (is already assured by tree creation) + // if the supernode is in the second half-tree, limit should be at least the size of the first half-tree (2**(depth-1)) plus 2 to additionally enumerate the supernode and one of its neighbours + let limit = 2**(supernodeTreeDepth-1)+2; + db._query(`FOR v IN 0..8 OUTBOUND "SupernodeV/S1:K1" GRAPH "Supernode" LIMIT @limit RETURN v.data`, + { + limit: limit + }, + {}, + {silent} + ); + }, + // ///////////////////////////////////////////////////////////////////////////// // documentTests @@ -2619,6 +2648,14 @@ exports.test = function (testParams) { name: "k-shortest-any", params: { func: kShortestAny } }, + { + name: "supernode-traversal", + params: { func: supernode } + }, + { + name: "supernode-traversal-limit", + params: { func: supernode_limit } + }, { name: "subquery-exists-path", params: { func: subqueryExistsPath }