Skip to content

Commit 6b9ef9f

Browse files
committed
Add example for hyperparameter tuning of node embeddings
1 parent 7afe72e commit 6b9ef9f

9 files changed

+2593
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Creates a smaller projection by sampling the original graph using "Common Neighbour Aware Random Walk"
2+
3+
CALL gds.graph.sample.cnarw(
4+
$dependencies_projection + '-sampled-cleaned',
5+
$dependencies_projection,
6+
{
7+
samplingRatio: toFloat($dependencies_projection_sampling_ratio)
8+
}
9+
)
10+
YIELD graphName, fromGraphName, nodeCount, relationshipCount, startNodeCount, projectMillis
11+
RETURN graphName, fromGraphName, nodeCount, relationshipCount, startNodeCount, projectMillis
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// Writes batch data back into the database for code units when working with a dependencies projection. Variables: dependencies_projection_rows, dependencies_projection_node
2+
3+
UNWIND $dependencies_projection_rows AS row
4+
MATCH (codeUnit)
5+
WHERE elementId(codeUnit) = row.nodeId
6+
AND $dependencies_projection_node IN labels(codeUnit)
7+
SET codeUnit += row.properties
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Node Embeddings 1d using Fast Random Projection: Stream for Hyper-Parameter tuning. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.fastRP.stream(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,normalizationStrength: toFloat($dependencies_projection_fast_random_projection_normalization_strength)
8+
,iterationWeights: [0.0, 0.0, 1.0, toFloat($dependencies_projection_fast_random_projection_forth_iteration_weight)]
9+
,relationshipWeightProperty: $dependencies_projection_weight_property
10+
}
11+
)
12+
YIELD nodeId, embedding
13+
WITH gds.util.asNode(nodeId) AS codeUnit
14+
,embedding
15+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
16+
WITH *, artifact.name AS artifactName
17+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
18+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
19+
RETURN DISTINCT
20+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
21+
,codeUnit.name AS shortCodeUnitName
22+
,elementId(codeUnit) AS nodeElementId
23+
,coalesce(artifactName, projectName) AS projectName
24+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
25+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
26+
,embedding
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Node Embeddings 1e using Fast Random Projection: Write for tuned hyper-parameters.
2+
3+
CALL gds.fastRP.write(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,normalizationStrength: toFloat($dependencies_projection_fast_random_projection_normalization_strength)
8+
,iterationWeights: [0.0, 0.0, 1.0, toFloat($dependencies_projection_fast_random_projection_forth_iteration_weight)]
9+
,relationshipWeightProperty: $dependencies_projection_weight_property
10+
,writeProperty: $dependencies_projection_write_property
11+
}
12+
)
13+
YIELD nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, writeMillis
14+
RETURN nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, writeMillis
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Node Embeddings 2c using Hash GNN (Graph Neural Networks): Stream. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.beta.hashgnn.stream(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDensity: toInteger($dependencies_projection_embedding_dimension) * 2 * toInteger($dependencies_projection_hashgnn_dimension_multiplier)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,iterations: toInteger($dependencies_projection_hashgnn_iterations)
8+
,generateFeatures: {
9+
dimension: toInteger($dependencies_projection_embedding_dimension) * 4 * toInteger($dependencies_projection_hashgnn_dimension_multiplier)
10+
,densityLevel: toInteger($dependencies_projection_hashgnn_density_level)
11+
}
12+
,outputDimension: toInteger($dependencies_projection_embedding_dimension)
13+
,neighborInfluence: toFloat($dependencies_projection_hashgnn_neighbor_influence)
14+
}
15+
)
16+
YIELD nodeId, embedding
17+
WITH gds.util.asNode(nodeId) AS codeUnit
18+
,embedding
19+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
20+
WITH *, artifact.name AS artifactName
21+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
22+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
23+
RETURN DISTINCT
24+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
25+
,codeUnit.name AS shortCodeUnitName
26+
,elementId(codeUnit) AS nodeElementId
27+
,coalesce(artifactName, projectName) AS projectName
28+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
29+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
30+
,embedding
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Node Embeddings 3c using Node2Vec: Stream. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.node2vec.stream(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,iterations: toInteger($dependencies_projection_node2vec_iterations)
8+
,inOutFactor: toFloat($dependencies_projection_node2vec_in_out_factor)
9+
,returnFactor: toFloat($dependencies_projection_node2vec_return_factor)
10+
,windowSize: toInteger($dependencies_projection_node2vec_window_size)
11+
,walksPerNode: toInteger($dependencies_projection_node2vec_walks_per_node)
12+
,walkLength: toInteger($dependencies_projection_node2vec_walk_length)
13+
,negativeSamplingRate: toInteger($dependencies_projection_node2vec_negative_sampling_rate)
14+
,positiveSamplingFactor: toFloat($dependencies_projection_node2vec_positive_sampling_factor)
15+
,relationshipWeightProperty: $dependencies_projection_weight_property
16+
}
17+
)
18+
YIELD nodeId, embedding
19+
WITH gds.util.asNode(nodeId) AS codeUnit
20+
,embedding
21+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
22+
WITH *, artifact.name AS artifactName
23+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
24+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
25+
RETURN DISTINCT
26+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
27+
,codeUnit.name AS shortCodeUnitName
28+
,elementId(codeUnit) AS nodeElementId
29+
,coalesce(artifactName, projectName) AS projectName
30+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
31+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
32+
,embedding
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Will never return any results so that the validation will always fail. This is helpful for Jupyter Notebooks that should not be executed automatically.
2+
3+
MATCH (nothing) RETURN nothing LIMIT 0

0 commit comments

Comments
 (0)