@@ -16,112 +16,112 @@ const uint64_t M4 = 0x0f0f0f0f0f0f0f0fULL;
16
16
const uint64_t H01 = 0x0101010101010101ULL ;
17
17
18
18
int popcount (uint64_t x ) {
19
- x -= (x >> 1 ) & M1 ;
20
- x = (x & M2 ) + ((x >> 2 ) & M2 );
21
- x = (x + (x >> 4 )) & M4 ;
22
- return (x * H01 ) >> 56 ;
19
+ x -= (x >> 1 ) & M1 ;
20
+ x = (x & M2 ) + ((x >> 2 ) & M2 );
21
+ x = (x + (x >> 4 )) & M4 ;
22
+ return (x * H01 ) >> 56 ;
23
23
}
24
24
25
25
float match_fingerprints (signed int * a , int asize , signed int * b , int bsize , int maxoffset ) {
26
- int i , topcount , topoffset , size , biterror , minsize , auniq = 0 , buniq = 0 ;
27
- int numcounts = asize + bsize + 1 ;
28
- unsigned short * counts = calloc (numcounts , sizeof (unsigned short ));
29
- uint8_t * seen ;
30
- uint16_t * aoffsets , * boffsets ;
31
- uint64_t * adata , * bdata ;
32
- float score , diversity ;
33
-
34
- aoffsets = calloc ((MATCH_MASK + 1 ) * 2 , sizeof (uint16_t ));
35
- boffsets = aoffsets + MATCH_MASK + 1 ;
36
- seen = (uint8_t * )aoffsets ;
37
-
38
- for (i = 0 ; i < asize ; i ++ ) {
39
- aoffsets [MATCH_STRIP (a [i ])] = i ;
40
- }
41
-
42
- for (i = 0 ; i < bsize ; i ++ ) {
43
- boffsets [MATCH_STRIP (b [i ])] = i ;
44
- }
45
-
46
- topcount = 0 ;
47
- topoffset = 0 ;
48
- for (i = 0 ; i < MATCH_MASK ; i ++ ) {
49
- if (aoffsets [i ] && boffsets [i ]) {
50
- int offset = aoffsets [i ] - boffsets [i ];
51
- if (maxoffset == 0 || (- maxoffset <= offset && offset <= maxoffset )) {
52
- offset += bsize ;
53
- counts [offset ]++ ;
54
- if (counts [offset ] > topcount ) {
55
- topcount = counts [offset ];
56
- topoffset = offset ;
57
- }
58
- }
59
- }
60
- }
61
-
62
- topoffset -= bsize ;
63
-
64
- minsize = MIN (asize , bsize ) & ~1 ;
65
- if (topoffset < 0 ) {
66
- b -= topoffset ;
67
- bsize = MAX (0 , bsize + topoffset );
68
- }
69
- else {
70
- a += topoffset ;
71
- asize = MAX (0 , asize - topoffset );
72
- }
73
-
74
- size = MIN (asize , bsize ) / 2 ;
75
- if (!size || !minsize ) {
76
- printf ("acoustid_compare: empty matching subfingerprint\n" );
77
- score = 0.0 ;
78
- goto exit ;
79
- }
80
-
81
- memset (seen , 0 , UNIQ_MASK );
82
- for (i = 0 ; i < asize ; i ++ ) {
83
- int key = UNIQ_STRIP (a [i ]);
84
- if (!seen [key ]) {
85
- auniq ++ ;
86
- seen [key ] = 1 ;
87
- }
88
- }
89
-
90
- memset (seen , 0 , UNIQ_MASK );
91
- for (i = 0 ; i < bsize ; i ++ ) {
92
- int key = UNIQ_STRIP (b [i ]);
93
- if (!seen [key ]) {
94
- buniq ++ ;
95
- seen [key ] = 1 ;
96
- }
97
- }
98
-
99
- diversity = MIN (MIN (1.0 , (float )(auniq + 10 ) / asize + 0.5 ),
100
- MIN (1.0 , (float )(buniq + 10 ) / bsize + 0.5 ));
101
-
102
- if (topcount < MAX (auniq , buniq ) * 0.02 ) {
103
- printf ("acoustid_compare: top offset score is below 2%% of the unique size\n" );
104
- score = 0.0 ;
105
- goto exit ;
106
- }
107
-
108
- adata = (uint64_t * )a ;
109
- bdata = (uint64_t * )b ;
110
- biterror = 0 ;
111
- for (i = 0 ; i < size ; i ++ , adata ++ , bdata ++ ) {
112
- biterror += popcount (* adata ^ * bdata );
113
- }
114
- score = (size * 2.0 / minsize ) * (1.0 - 2.0 * (float )biterror / (64 * size ));
115
- if (score < 0.0 ) {
116
- score = 0.0 ;
117
- }
118
- if (diversity < 1.0 ) {
119
- float newscore = pow (score , 8.0 - 7.0 * diversity );
120
- printf ("acoustid_compare2: scaling score because of duplicate items, %f => %f\n" , score , newscore );
121
- score = newscore ;
122
- }
26
+ int i , topcount , topoffset , size , biterror , minsize , auniq = 0 , buniq = 0 ;
27
+ int numcounts = asize + bsize + 1 ;
28
+ unsigned short * counts = calloc (numcounts , sizeof (unsigned short ));
29
+ uint8_t * seen ;
30
+ uint16_t * aoffsets , * boffsets ;
31
+ uint64_t * adata , * bdata ;
32
+ float score , diversity ;
33
+
34
+ aoffsets = calloc ((MATCH_MASK + 1 ) * 2 , sizeof (uint16_t ));
35
+ boffsets = aoffsets + MATCH_MASK + 1 ;
36
+ seen = (uint8_t * )aoffsets ;
37
+
38
+ for (i = 0 ; i < asize ; i ++ ) {
39
+ aoffsets [MATCH_STRIP (a [i ])] = i ;
40
+ }
41
+
42
+ for (i = 0 ; i < bsize ; i ++ ) {
43
+ boffsets [MATCH_STRIP (b [i ])] = i ;
44
+ }
45
+
46
+ topcount = 0 ;
47
+ topoffset = 0 ;
48
+ for (i = 0 ; i < MATCH_MASK ; i ++ ) {
49
+ if (aoffsets [i ] && boffsets [i ]) {
50
+ int offset = aoffsets [i ] - boffsets [i ];
51
+ if (maxoffset == 0 || (- maxoffset <= offset && offset <= maxoffset )) {
52
+ offset += bsize ;
53
+ counts [offset ]++ ;
54
+ if (counts [offset ] > topcount ) {
55
+ topcount = counts [offset ];
56
+ topoffset = offset ;
57
+ }
58
+ }
59
+ }
60
+ }
61
+
62
+ topoffset -= bsize ;
63
+
64
+ minsize = MIN (asize , bsize ) & ~1 ;
65
+ if (topoffset < 0 ) {
66
+ b -= topoffset ;
67
+ bsize = MAX (0 , bsize + topoffset );
68
+ }
69
+ else {
70
+ a += topoffset ;
71
+ asize = MAX (0 , asize - topoffset );
72
+ }
73
+
74
+ size = MIN (asize , bsize ) / 2 ;
75
+ if (!size || !minsize ) {
76
+ printf ("acoustid_compare: empty matching subfingerprint\n" );
77
+ score = 0.0 ;
78
+ goto exit ;
79
+ }
80
+
81
+ memset (seen , 0 , UNIQ_MASK );
82
+ for (i = 0 ; i < asize ; i ++ ) {
83
+ int key = UNIQ_STRIP (a [i ]);
84
+ if (!seen [key ]) {
85
+ auniq ++ ;
86
+ seen [key ] = 1 ;
87
+ }
88
+ }
89
+
90
+ memset (seen , 0 , UNIQ_MASK );
91
+ for (i = 0 ; i < bsize ; i ++ ) {
92
+ int key = UNIQ_STRIP (b [i ]);
93
+ if (!seen [key ]) {
94
+ buniq ++ ;
95
+ seen [key ] = 1 ;
96
+ }
97
+ }
98
+
99
+ diversity = MIN (MIN (1.0 , (float )(auniq + 10 ) / asize + 0.5 ),
100
+ MIN (1.0 , (float )(buniq + 10 ) / bsize + 0.5 ));
101
+
102
+ if (topcount < MAX (auniq , buniq ) * 0.02 ) {
103
+ printf ("acoustid_compare: top offset score is below 2%% of the unique size\n" );
104
+ score = 0.0 ;
105
+ goto exit ;
106
+ }
107
+
108
+ adata = (uint64_t * )a ;
109
+ bdata = (uint64_t * )b ;
110
+ biterror = 0 ;
111
+ for (i = 0 ; i < size ; i ++ , adata ++ , bdata ++ ) {
112
+ biterror += popcount (* adata ^ * bdata );
113
+ }
114
+ score = (size * 2.0 / minsize ) * (1.0 - 2.0 * (float )biterror / (64 * size ));
115
+ if (score < 0.0 ) {
116
+ score = 0.0 ;
117
+ }
118
+ if (diversity < 1.0 ) {
119
+ float newscore = pow (score , 8.0 - 7.0 * diversity );
120
+ printf ("acoustid_compare2: scaling score because of duplicate items, %f => %f\n" , score , newscore );
121
+ score = newscore ;
122
+ }
123
123
exit :
124
- free (aoffsets );
125
- free (counts );
126
- return score ;
124
+ free (aoffsets );
125
+ free (counts );
126
+ return score ;
127
127
}
0 commit comments