19
19
*/
20
20
package org .neo4j .gds .similarity .nodesim ;
21
21
22
- import org .neo4j .gds .collections .hsa .HugeSparseDoubleArray ;
23
22
import org .neo4j .gds .core .utils .Intersections ;
24
23
25
- import java .util .Arrays ;
26
-
27
24
public class JaccardSimilarityComputer implements MetricSimilarityComputer {
28
25
private final double similarityCutoff ;
29
26
@@ -44,41 +41,42 @@ public double computeWeightedSimilarity(long[] vector1, long[] vector2, double[]
44
41
assert vector1 .length == weights1 .length ;
45
42
assert vector2 .length == weights2 .length ;
46
43
47
- // It is possible the input vectors to have different lengths.
48
- // In such cases we need to make sure that elements that are missing get assigned `0.0` weights.
49
- // To do so, we make use of HugeSparseDoubleArrays
50
- // where the index is the element from `vector1` and `vector2` and
51
- // the values are from `weights1` and `weights2`.
52
-
53
- // 1. Find the maximum element from each of the vector arrays
54
- var vector1MaxElement = Arrays .stream (vector1 ).max ().orElseThrow ();
55
- var vector2MaxElement = Arrays .stream (vector2 ).max ().orElseThrow ();
56
- var maxElement = Math .max (vector1MaxElement , vector2MaxElement );
57
-
58
- // 2. Create HugeSparseDoubleArrays
59
- var vector1WeightsBuilder = HugeSparseDoubleArray .builder (0d , maxElement );
60
- for (int i = 0 ; i < vector1 .length ; i ++) {
61
- vector1WeightsBuilder .set (vector1 [i ], weights1 [i ]);
44
+ int offset1 = 0 ;
45
+ int offset2 = 0 ;
46
+ int length1 = weights1 .length ;
47
+ int length2 = weights2 .length ;
48
+ double max = 0 ;
49
+ double min = 0 ;
50
+ while (offset1 < length1 && offset2 < length2 ) {
51
+ long target1 = vector1 [offset1 ];
52
+ long target2 = vector2 [offset2 ];
53
+ if (target1 == target2 ) {
54
+ double w1 = weights1 [offset1 ];
55
+ double w2 = weights2 [offset2 ];
56
+ if (w1 > w2 ) {
57
+ max += w1 ;
58
+ min += w2 ;
59
+ } else {
60
+ min += w1 ;
61
+ max += w2 ;
62
+ }
63
+ offset1 ++;
64
+ offset2 ++;
65
+ } else if (target1 < target2 ) {
66
+ max += weights1 [offset1 ];
67
+ offset1 ++;
68
+ } else {
69
+ max += weights2 [offset2 ];
70
+ offset2 ++;
71
+ }
62
72
}
63
- var vector1Weights = vector1WeightsBuilder .build ();
64
- var vector2WeightsBuilder = HugeSparseDoubleArray .builder (0d , maxElement );
65
- for (int i = 0 ; i < vector2 .length ; i ++) {
66
- vector2WeightsBuilder .set (vector2 [i ], weights2 [i ]);
73
+ for (; offset1 < length1 ; offset1 ++) {
74
+ max += weights1 [offset1 ];
67
75
}
68
- var vector2Weights = vector2WeightsBuilder .build ();
69
-
70
- // 3. Iterate over the arrays and compute the min and max sums
71
- var minSum = 0d ;
72
- var maxSum = 0d ;
73
- for (int i = 0 ; i <= maxElement ; i ++) {
74
- var weight1 = vector1Weights .get (i );
75
- var weight2 = vector2Weights .get (i );
76
- minSum += Math .min (weight1 , weight2 );
77
- maxSum += Math .max (weight1 , weight2 );
76
+ for (; offset2 < length2 ; offset2 ++) {
77
+ max += weights2 [offset2 ];
78
78
}
79
-
80
- // 4. Compute the final similarity
81
- var similarity = minSum / maxSum ;
79
+ double similarity = min / max ;
82
80
return similarity >= similarityCutoff ? similarity : Double .NaN ;
83
81
}
84
82
0 commit comments