Skip to content

Commit 6989efe

Browse files
committed
WIP
1 parent 1e912b5 commit 6989efe

File tree

11 files changed

+285
-179
lines changed

11 files changed

+285
-179
lines changed

docs/005/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ they are ignored by most authors in any analysis of the text.
164164

165165
Some glyphs (EVA 't', 'k', 'p' and 'f') appear taller than other characters and are traditionally referred to as "gallows".
166166
The combination 'ch' is instead called "pedestal". Some glyphs (EVA 'cth', 'ckh', 'cph' and 'cfh') appear visually as a
167-
overlap of the pedestal with one of the gallows and are therefore called "pedestalled gallows".
167+
overlap of the pedestal with one of the gallows and are therefore called "pedestalled gallows" (or "split gallows").
168168
These glyphs appear in slots 3, 4, and 5 and are shown in the below table.
169169

170170
![Gallows and Pedestal](images/Gallows.PNG)

docs/010/images/A.PNG

57.7 KB
Loading

docs/010/images/B.PNG

58.4 KB
Loading

docs/010/images/C.PNG

54.5 KB
Loading

docs/010/images/SummaryTable.PNG

-35.7 KB
Loading

docs/010/index.md

Lines changed: 98 additions & 171 deletions
Large diffs are not rendered by default.

eclipse/io.github.mzattera.v4j-apps/src/main/java/io/github/mzattera/v4j/applications/chars/CharDistributionAnalysis.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public abstract class CharDistributionAnalysis {
6666
private static final double ALPHA2 = 0.05d;
6767

6868
/** Compact output */
69-
private static final boolean COMPACT = false;
69+
private static final boolean COMPACT = true;
7070

7171
protected CharDistributionAnalysis() {
7272
}
@@ -120,15 +120,20 @@ private static void process(IvtffText doc, Experiment experiment) {
120120
}
121121

122122
// Header with chars
123-
System.out.print("Cluster;Significance [alpha];");
123+
System.out.print("Cluster;");
124+
if (!COMPACT)
125+
System.out.print("Significance [alpha];");
124126
for (int i = 0; i < chars.length; ++i)
125127
System.out.print(chars[i] + ";");
126128
System.out.println();
127129

128130
// Do analysis for each cluster
129131
for (String cluster : PageHeader.CLUSTERS) {
130132

131-
// TEST IvtffText clusterText = doc.filterPages(new PageFilter.Builder().cluster(cluster).build()).shuffledText();
133+
// TEST with random text
134+
// IvtffText clusterText = doc.filterPages(new
135+
// PageFilter.Builder().cluster(cluster).build()).shuffledText();
136+
132137
IvtffText clusterText = doc.filterPages(new PageFilter.Builder().cluster(cluster).build());
133138
process(cluster, clusterText, COMPACT, experiment);
134139

@@ -185,7 +190,8 @@ public static List<Character>[] process(String cluster, IvtffText txt, boolean c
185190
// there is a significative difference
186191
System.out.print(cluster + ";");
187192
double confidence = ChiSquared.chiSquareTestDataSetsComparison(parts[0], parts[1], adjustedDistribution, false);
188-
System.out.printf("%.2f%%;", confidence * 100);
193+
if (!COMPACT)
194+
System.out.printf("%.2f%%;", confidence * 100);
189195
if (confidence > ALPHA) { // Not a difference in the two part that is statistically significant; exit
190196
return result;
191197
}
@@ -213,7 +219,7 @@ public static List<Character>[] process(String cluster, IvtffText txt, boolean c
213219
// as a minimum?
214220
// double expectedCount = cd.getFrequency()
215221
// * Math.min(parts[0].getChars().getTotalCounted(), parts[1].getChars().getTotalCounted());
216-
222+
217223
// Observe actual frequencies of the char (2 categories: the char and all
218224
// others)
219225
long[] obs1 = ChiSquared.observe(parts[0], chars[i], false);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/**
2+
*
3+
*/
4+
package io.github.mzattera.v4j.applications.chars;
5+
6+
import java.util.List;
7+
import java.util.Map.Entry;
8+
9+
import io.github.mzattera.v4j.experiment.Experiment;
10+
import io.github.mzattera.v4j.text.ElementFilter;
11+
import io.github.mzattera.v4j.text.Text;
12+
import io.github.mzattera.v4j.text.alphabet.Alphabet;
13+
import io.github.mzattera.v4j.text.ivtff.IvtffPage;
14+
import io.github.mzattera.v4j.text.ivtff.IvtffText;
15+
import io.github.mzattera.v4j.text.ivtff.LineFilter;
16+
import io.github.mzattera.v4j.text.ivtff.PageFilter;
17+
import io.github.mzattera.v4j.text.ivtff.PageHeader;
18+
import io.github.mzattera.v4j.text.ivtff.VoynichFactory;
19+
import io.github.mzattera.v4j.text.ivtff.VoynichFactory.Transcription;
20+
import io.github.mzattera.v4j.text.ivtff.VoynichFactory.TranscriptionType;
21+
import io.github.mzattera.v4j.util.Counter;
22+
23+
/**
24+
* Statistics about words starting or containing gallows, in different positions
25+
* of the text.
26+
*
27+
*
28+
* @author Massimiliano "Maxi" Zattera
29+
*
30+
*/
31+
public final class CountGallows {
32+
33+
/**
34+
* Which transcription to use.
35+
*/
36+
public static final Transcription TRANSCRIPTION = Transcription.AUGMENTED;
37+
38+
/**
39+
* Which transcription type to use.
40+
*/
41+
public static final TranscriptionType TRANSCRIPTION_TYPE = TranscriptionType.MAJORITY;
42+
43+
/**
44+
* Which Alphabet type to use.
45+
*/
46+
public static final Alphabet ALPHABET = Alphabet.SLOT;
47+
48+
/** Filter to use on pages before analysis */
49+
public static final ElementFilter<IvtffPage> FILTER = null;
50+
51+
private final static char[] GALLOWS = { 't', 'k', 'p', 'f', 'T', 'K', 'P', 'F', 'C', 'S' };
52+
53+
private CountGallows() {
54+
}
55+
56+
/**
57+
* @param args
58+
*/
59+
public static void main(String[] args) {
60+
try {
61+
// Prints configuration parameters
62+
System.out.println("Transcription : " + TRANSCRIPTION);
63+
System.out.println("Transcription Type: " + TRANSCRIPTION_TYPE);
64+
System.out.println("Alphabet : " + ALPHABET);
65+
System.out.println("Filter : " + (FILTER == null ? "<no-filter>" : FILTER)
66+
+ " running text only (P0 & P1)");
67+
System.out.println();
68+
69+
IvtffText doc = VoynichFactory.getDocument(TRANSCRIPTION, TRANSCRIPTION_TYPE, ALPHABET);
70+
doc = doc.filterLines(LineFilter.PARAGRAPH_TEXT_FILTER);
71+
if (FILTER != null)
72+
doc = doc.filterPages(FILTER);
73+
74+
System.out.println("% of Tokens Containig Gallows");
75+
process(doc, false);
76+
System.out.println("\n\n% of Tokens Starting with Gallows");
77+
process(doc, true);
78+
79+
} catch (Exception e) {
80+
e.printStackTrace();
81+
} finally {
82+
System.out.println("\nCompleted.");
83+
}
84+
85+
}
86+
87+
/**
88+
* @param doc
89+
* @param first If true count tokens starting with gallows, otherwise count
90+
* tokens containing gallows.
91+
*/
92+
private static void process(IvtffText doc, boolean first) {
93+
// Header
94+
System.out.print("Locus;Cluster;");
95+
for (char c : GALLOWS)
96+
System.out.print(c + ";");
97+
System.out.println();
98+
99+
for (String cluster : PageHeader.CLUSTERS) {
100+
IvtffText clusterText = doc.filterPages(new PageFilter.Builder().cluster(cluster).build());
101+
102+
// Gets words in different positions;
103+
// TODO NOTICE WE IGNORE UNREADABLE WORDS!!!!!
104+
105+
Text[] split = new Experiment.FirstLineInParagraph(false).splitDocument(clusterText);
106+
IvtffText firstLines = (IvtffText)split[0];
107+
List<Counter<String>> firstLineWords = Experiment.getWordsByPosition(firstLines, true);
108+
109+
// First words of paragraphs
110+
Counter<String> firstWordsOfPar = firstLineWords.get(0);
111+
System.out.print("First Word of a Paragraph;" + cluster + ";");
112+
analize(firstWordsOfPar, first);
113+
114+
// Other words in first line
115+
Counter<String> wordsInFirstLine = new Counter<>();
116+
for (int i = 1; i < firstLineWords.size(); ++i) {
117+
wordsInFirstLine.countAll(firstLineWords.get(i));
118+
}
119+
System.out.print("Remaining Words in First Line;" + cluster + ";");
120+
analize(wordsInFirstLine, first);
121+
122+
List<Counter<String>> other = Experiment
123+
.getWordsByPosition((IvtffText)split[1], true);
124+
125+
// First words in remaining lines
126+
Counter<String> firstWordsInLine = other.get(0);
127+
System.out.print("First Word of Other Lines;" + cluster + ";");
128+
analize(firstWordsInLine, first);
129+
130+
// All other words
131+
Counter<String> rest = new Counter<>();
132+
for (int i = 1; i < other.size(); ++i) {
133+
rest.countAll(other.get(i));
134+
}
135+
System.out.print("Remaining Words;" + cluster + ";");
136+
analize(rest, first);
137+
} // for each cluster
138+
}
139+
140+
/**
141+
* Prints relevant % for given word distribution.
142+
*
143+
* @param wordsInFirstLine
144+
* @param first If true count tokens starting with gallows, otherwise
145+
* count tokens containing gallows.
146+
*/
147+
private static void analize(Counter<String> words, boolean first) {
148+
149+
// How many tokens with given gallows
150+
Counter<Character> tokens = new Counter<>();
151+
152+
for (Entry<String, Integer> e : words.entrySet()) {
153+
for (char c : GALLOWS) {
154+
if ((first && (e.getKey().charAt(0) == c)) || (!first && (e.getKey().indexOf(c) != -1))) {
155+
tokens.count(c, e.getValue());
156+
}
157+
}
158+
}
159+
160+
// Print results
161+
double t = 0.0;
162+
for (char c : GALLOWS) {
163+
double p = (double) tokens.getCount(c) / words.getTotalCounted();
164+
if (Double.isNaN(p))
165+
p = 0.0;
166+
t += p;
167+
System.out.print(p + ";");
168+
}
169+
System.out.println(t);
170+
}
171+
}

eclipse/io.github.mzattera.v4j-apps/src/main/java/io/github/mzattera/v4j/applications/chars/CurrierRepeatTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import io.github.mzattera.v4j.text.ivtff.IvtffLine;
1111
import io.github.mzattera.v4j.text.ivtff.IvtffPage;
1212
import io.github.mzattera.v4j.text.ivtff.IvtffText;
13+
import io.github.mzattera.v4j.text.ivtff.LineFilter;
1314
import io.github.mzattera.v4j.text.ivtff.VoynichFactory;
1415
import io.github.mzattera.v4j.text.ivtff.VoynichFactory.Transcription;
1516
import io.github.mzattera.v4j.text.ivtff.VoynichFactory.TranscriptionType;
@@ -59,6 +60,7 @@ public static void main(String[] args) {
5960
System.out.println();
6061

6162
IvtffText doc = VoynichFactory.getDocument(TRANSCRIPTION, TRANSCRIPTION_TYPE, ALPHABET);
63+
doc = doc.filterLines(LineFilter.PARAGRAPH_TEXT_FILTER);
6264
if (FILTER != null)
6365
doc = doc.filterPages(FILTER);
6466

@@ -68,8 +70,8 @@ public static void main(String[] args) {
6870
if (w.length == 0)
6971
continue; // paranoid guard
7072
if ((lastWord != null) && (w[0].equals(lastWord)))
71-
System.out.println(l);
72-
lastWord = w[w.length-1];
73+
System.out.println(l.getPage().getDescriptor().getCluster()+ " -> " + l);
74+
lastWord = w[w.length - 1];
7375

7476
} // for each line
7577

Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)