diff --git a/README.md b/README.md index 133c712..afb4ae5 100644 --- a/README.md +++ b/README.md @@ -79,31 +79,31 @@ The output will first produce various tables which are for conveniency encoded i ### 4. Predictions (Automated, up to two candidates) -| doculect | predicted | perfect | proportion | score | -|:-----------|------------:|----------:|-------------:|--------:| -| Duhumbi | 13 | 6 | 0.4615 | 0.6923 | -| Jerigaon | 73 | 34 | 0.4658 | 0.7169 | -| Khispi | 31 | 13 | 0.4194 | 0.7151 | -| Khoina | 45 | 16 | 0.3556 | 0.6667 | -| Khoitam | 47 | 22 | 0.4681 | 0.7270 | -| Rahung | 48 | 24 | 0.5000 | 0.7292 | -| Rupa | 31 | 12 | 0.3871 | 0.6505 | -| Shergaon | 91 | 40 | 0.4396 | 0.7051 | -| total | 379 | 167 | 0.4406 | 0.7003 | +| doculect | words | morphemes | perfect | proportion | score | +|:-----------|--------:|------------:|----------:|-------------:|--------:| +| Duhumbi | 11 | 13 | 6 | 0.4615 | 0.6923 | +| Jerigaon | 62 | 73 | 34 | 0.4658 | 0.7169 | +| Khispi | 26 | 31 | 13 | 0.4194 | 0.7151 | +| Khoina | 38 | 45 | 16 | 0.3556 | 0.6667 | +| Khoitam | 39 | 47 | 23 | 0.4894 | 0.7447 | +| Rahung | 45 | 48 | 24 | 0.5000 | 0.7292 | +| Rupa | 25 | 31 | 13 | 0.4194 | 0.6559 | +| Shergaon | 81 | 91 | 40 | 0.4396 | 0.7051 | +| total | 327 | 379 | 169 | 0.4459 | 0.7032 | ### 5. Predictions (Automated, up to three candidates) -| doculect | predicted | perfect | proportion | score | -|:-----------|------------:|----------:|-------------:|--------:| -| Duhumbi | 13 | 6 | 0.4615 | 0.7179 | -| Jerigaon | 73 | 34 | 0.4658 | 0.7192 | -| Khispi | 31 | 13 | 0.4194 | 0.7151 | -| Khoina | 45 | 16 | 0.3556 | 0.6728 | -| Khoitam | 47 | 22 | 0.4681 | 0.7305 | -| Rahung | 48 | 24 | 0.5000 | 0.7292 | -| Rupa | 31 | 12 | 0.3871 | 0.6595 | -| Shergaon | 91 | 40 | 0.4396 | 0.7088 | -| total | 379 | 167 | 0.4406 | 0.7066 | +| doculect | words | morphemes | perfect | proportion | score | +|:-----------|--------:|------------:|----------:|-------------:|--------:| +| Duhumbi | 11 | 13 | 6 | 0.4615 | 0.7179 | +| Jerigaon | 62 | 73 | 34 | 0.4658 | 0.7192 | +| Khispi | 26 | 31 | 13 | 0.4194 | 0.7151 | +| Khoina | 38 | 45 | 16 | 0.3556 | 0.6728 | +| Khoitam | 39 | 47 | 23 | 0.4894 | 0.7482 | +| Rahung | 45 | 48 | 24 | 0.5000 | 0.7292 | +| Rupa | 25 | 31 | 13 | 0.4194 | 0.6649 | +| Shergaon | 81 | 91 | 40 | 0.4396 | 0.7088 | +| total | 327 | 379 | 169 | 0.4459 | 0.7095 | ``` diff --git a/evaluate.py b/evaluate.py index 553bd03..0ef99c5 100644 --- a/evaluate.py +++ b/evaluate.py @@ -217,7 +217,7 @@ sum([row[2] for row in btable]), sum([row[3] for row in btable]), sum([row[3] for row in btable])/sum([row[2] for row in btable]), - sum([row[5] for row in btable])/len(docs), + sum([row[5] for row in btable])/len(S), ]] print('\n### 3. Predictions (Automated, one candidate)\n') print(tabulate( @@ -264,6 +264,7 @@ S[doc][cogid] = [score, wordA, wordB, idxA, attIdx[0]] ctable += [[ doc, + len(PREDICTED[doc]), len(S[doc]), len([x for x in S[doc].values() if x[0] == 1]), len([x for x in S[doc].values() if x[0] == 1])/len(S[doc]), @@ -274,8 +275,9 @@ 'total', sum([row[1] for row in ctable]), sum([row[2] for row in ctable]), - sum([row[2] for row in ctable])/sum([row[1] for row in ctable]), - sum([row[4] for row in ctable])/len(docs), + sum([row[3] for row in ctable]), + sum([row[3] for row in ctable])/sum([row[2] for row in ctable]), + sum([row[5] for row in ctable])/len(S), ]] print('\n### 4. Predictions (Automated, up to two candidates)\n') @@ -283,7 +285,8 @@ ctable, headers=[ 'doculect', - 'predicted', + 'words', + 'morphemes', 'perfect', 'proportion', 'score'], @@ -322,18 +325,21 @@ S[doc][cogid] = [score, wordA, wordB, idxA, attIdx[0]] dtable += [[ doc, + len(PREDICTED[doc]), len(S[doc]), len([x for x in S[doc].values() if x[0] == 1]), len([x for x in S[doc].values() if x[0] == 1])/len(S[doc]), sum([cog[0] for cog in S[doc].values()])/len(S[doc]) ]] + dtable += [[ 'total', sum([row[1] for row in dtable]), sum([row[2] for row in dtable]), - sum([row[2] for row in dtable])/sum([row[1] for row in dtable]), - sum([row[4] for row in dtable])/len(docs), + sum([row[3] for row in dtable]), + sum([row[3] for row in dtable])/sum([row[2] for row in dtable]), + sum([row[5] for row in dtable])/len(S), ]] print('\n### 5. Predictions (Automated, up to three candidates)\n') @@ -341,7 +347,8 @@ dtable, headers=[ 'doculect', - 'predicted', + 'words', + 'morphemes', 'perfect', 'proportion', 'score'], @@ -395,7 +402,8 @@ ctable, headers=[ 'doculect', - 'predicted', + 'words', + 'morphemes', 'perfect', 'proportion', 'score'], @@ -407,7 +415,8 @@ dtable, headers=[ 'doculect', - 'predicted', + 'words', + 'morphemes', 'perfect', 'proportion', 'score'], diff --git a/results/computer-predictions-2.html b/results/computer-predictions-2.html index 34a852f..5952bf8 100644 --- a/results/computer-predictions-2.html +++ b/results/computer-predictions-2.html @@ -1,16 +1,16 @@ - + - - - - - - - - - + + + + + + + + +
doculect predicted perfect proportion score
doculect words morphemes perfect proportion score
Duhumbi 13 6 0.4615 0.6923
Jerigaon 73 34 0.4658 0.7169
Khispi 31 13 0.4194 0.7151
Khoina 45 16 0.3556 0.6556
Khoitam 48 22 0.4583 0.7257
Rahung 48 24 0.5000 0.7292
Rupa 31 12 0.3871 0.6505
Shergaon 92 40 0.4348 0.7029
total 381 167 0.4383 0.6985
Duhumbi 11 13 6 0.4615 0.6923
Jerigaon 62 73 34 0.4658 0.7169
Khispi 26 31 13 0.4194 0.7151
Khoina 38 45 16 0.3556 0.6667
Khoitam 39 47 23 0.4894 0.7447
Rahung 45 48 24 0.5000 0.7292
Rupa 25 31 13 0.4194 0.6559
Shergaon 81 91 40 0.4396 0.7051
total 327 379 169 0.4459 0.7032
\ No newline at end of file diff --git a/results/computer-predictions-3.html b/results/computer-predictions-3.html index 058e9f0..17f85fe 100644 --- a/results/computer-predictions-3.html +++ b/results/computer-predictions-3.html @@ -1,16 +1,16 @@ - + - - - - - - - - - + + + + + + + + +
doculect predicted perfect proportion score
doculect words morphemes perfect proportion score
Duhumbi 13 6 0.4615 0.7179
Jerigaon 73 34 0.4658 0.7184
Khispi 31 13 0.4194 0.7151
Khoina 45 16 0.3556 0.6617
Khoitam 48 22 0.4583 0.7292
Rahung 48 24 0.5000 0.7292
Rupa 31 12 0.3871 0.6595
Shergaon 92 40 0.4348 0.7065
total 381 167 0.4383 0.7047
Duhumbi 11 13 6 0.4615 0.7179
Jerigaon 62 73 34 0.4658 0.7192
Khispi 26 31 13 0.4194 0.7151
Khoina 38 45 16 0.3556 0.6728
Khoitam 39 47 23 0.4894 0.7482
Rahung 45 48 24 0.5000 0.7292
Rupa 25 31 13 0.4194 0.6649
Shergaon 81 91 40 0.4396 0.7088
total 327 379 169 0.4459 0.7095
\ No newline at end of file diff --git a/results/computer-predictions.html b/results/computer-predictions.html index 8c15344..104293a 100644 --- a/results/computer-predictions.html +++ b/results/computer-predictions.html @@ -4,13 +4,13 @@ Duhumbi 11 13 6 0.4615 0.6923 -Jerigaon 62 73 34 0.4658 0.6986 +Jerigaon 62 73 34 0.4658 0.6963 Khispi 26 31 13 0.4194 0.7097 -Khoina 38 45 16 0.3556 0.6481 +Khoina 38 45 16 0.3556 0.6593 Khoitam 39 47 23 0.4894 0.7340 Rahung 45 48 24 0.5000 0.7153 Rupa 25 31 13 0.4194 0.6505 Shergaon 81 91 40 0.4396 0.6923 -total 327 379 169 0.4459 0.6926 +total 327 379 169 0.4459 0.6937 \ No newline at end of file diff --git a/results/errors.tsv b/results/errors.tsv index 82f944f..a2915d7 100644 --- a/results/errors.tsv +++ b/results/errors.tsv @@ -5,7 +5,7 @@ tʰ t Duhumbi 1 tʰ u ŋ t ʰu ŋ u ʰu Duhumbi 1 tʰ u ŋ t ʰu ŋ ə u Jerigaon 2 m ə m u ɔ ɔː Jerigaon 1 w ɔ w ɔː -a ɛˀ Jerigaon 1 tɕʰ a - tɕʰ ɛˀ - +a ɛˀ Jerigaon 1 tɕʰ a tɕʰ ɛˀ eˀ eː Jerigaon 2 h eˀ h eː dʑ dz Jerigaon 1 dʑ i k dz i k - n Jerigaon 1 s a - s a n @@ -65,8 +65,8 @@ a ɔː Khoina 1 tɕ a ŋ tɕʰ ɔː - - aː Khoina 1 b - b aː ɔ øˀ Khoina 1 r ɔ p r øˀ - p - Khoina 1 r ɔ p r øˀ - -g l Khoina 1 g u l uˀ -u uˀ Khoina 1 g u l uˀ +g l Khoina 1 g u - l uˀ - +u uˀ Khoina 1 g u - l uˀ - ɛ ɐ̃ː Khoina 1 x ɛ t x ɐ̃ː - t - Khoina 1 x ɛ t x ɐ̃ː - g kʰ Khoina 1 g ɔ kʰ ɔˀ @@ -79,7 +79,7 @@ y a Khoina 1 g y g a s dʑ Khoina 1 s ɔ ŋ dʑ y ŋ ɔ y Khoina 1 s ɔ ŋ dʑ y ŋ ə a Khoina 1 m ə m a -eˀ ɛˀ Khoina 1 j eˀ j ɛˀ +eˀ ɛˀ Khoina 1 j eˀ - j ɛˀ - j tɕʰ Khoina 1 j ɔ k tɕʰ ɔ k z dz Khoina 1 z ɛ n dz ɛ n ə aː Khoina 1 n ə n aː diff --git a/results/human-predictions.html b/results/human-predictions.html index c389439..ee2d362 100644 --- a/results/human-predictions.html +++ b/results/human-predictions.html @@ -4,13 +4,13 @@ Duhumbi 11 14 10 0.7143 0.8690 -Jerigaon 62 83 51 0.6145 0.8012 +Jerigaon 62 83 51 0.6145 0.7992 Khispi 26 33 19 0.5758 0.7828 -Khoina 38 48 20 0.4167 0.6771 +Khoina 38 48 20 0.4167 0.6875 Khoitam 39 54 28 0.5185 0.7685 Rahung 45 53 29 0.5472 0.7453 Rupa 25 33 15 0.4545 0.6616 Shergaon 81 99 49 0.4949 0.7340 -total 327 417 221 0.5300 0.7549 +total 327 417 221 0.5300 0.7560 \ No newline at end of file