From 3918e5640810aecb54a6298605cc798d60a50771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Kov=C3=A1cs?= Date: Sat, 18 Jan 2025 16:29:16 +0100 Subject: [PATCH] Implement dump --- cli.cpp | 4 +- qt/statementwindow.cpp | 4 +- statements/pbrst-cli.c | 13 ++- statements/pbrst.y | 182 +++++++++++++++++++++++++++++++++++------ 4 files changed, 172 insertions(+), 31 deletions(-) diff --git a/cli.cpp b/cli.cpp index 0221739..51caf2c 100644 --- a/cli.cpp +++ b/cli.cpp @@ -8,7 +8,7 @@ #include #ifdef WITH_PBRST -extern "C" char* brst_scan_string(char *string); +extern "C" char* brst_scan_string(char *string, int correct_raw, int show_dump); // #include "statements/pbrst.tab.h" // use flex/bison parser for bibref statements (brst) #include "pbrst.tab.h" // the statements folder must be included among the folders #endif @@ -852,7 +852,7 @@ void processGetrefsCmd(string input) { #ifdef WITH_PBRST void processStatementCmd(string input) { - char *output = brst_scan_string((char*)input.c_str()); + char *output = brst_scan_string((char*)input.c_str(), 0, 0); string output_s(output); vector statementAnalysis; boost::split(statementAnalysis, output_s, boost::is_any_of("\n")); diff --git a/qt/statementwindow.cpp b/qt/statementwindow.cpp index 8eb0a1d..ddc9dea 100644 --- a/qt/statementwindow.cpp +++ b/qt/statementwindow.cpp @@ -10,7 +10,7 @@ #include #include -extern "C" char* brst_scan_string(char *string); +extern "C" char* brst_scan_string(char *string, int correct_raw, int show_dump); #include "pbrst.tab.h" // the statements folder must be included among the folders using namespace std; @@ -93,7 +93,7 @@ void StatementWindow::setupFileMenu() void StatementWindow::parse() { #ifdef WITH_PBRST - char* output = brst_scan_string((char*)editor->toPlainText().toStdString().c_str()); + char* output = brst_scan_string((char*)editor->toPlainText().toStdString().c_str(), 0, 0); string output_s(output); vector statementAnalysis; boost::split(statementAnalysis, output_s, boost::is_any_of("\n")); diff --git a/statements/pbrst-cli.c b/statements/pbrst-cli.c index 07f987b..d2b613d 100644 --- a/statements/pbrst-cli.c +++ b/statements/pbrst-cli.c @@ -61,13 +61,16 @@ int main(int ac, char **av) { extern FILE *yyin; extern int correct_raw; + extern int show_dump; bool colorize = false; bool graphviz = false; correct_raw = 0; + show_dump = 0; while (ac>1 && (!strcmp(av[1], "-d") || !strcmp(av[1], "-c") - || !strcmp(av[1], "-g") || !strcmp(av[1], "-r") || !strcmp(av[1], "-h"))) { + || !strcmp(av[1], "-g") || !strcmp(av[1], "-r") || !strcmp(av[1], "-h") + || !strcmp(av[1], "-u"))) { if(!strcmp(av[1], "-h")) { printf("pbrst-cli [options] [input.brst], a command line brst parser\n"); @@ -77,6 +80,7 @@ int main(int ac, char **av) printf(" -c\tcolorize output\n"); printf(" -g\tshow only graphviz output\n"); printf(" -r\tcorrect raw positions\n"); + printf(" -u\tshow BRST dump\n"); exit(0); } @@ -96,6 +100,10 @@ int main(int ac, char **av) correct_raw = 1; ac--; av++; } + if(!strcmp(av[1], "-u")) { + show_dump = 1; ac--; av++; + } + } if(ac > 1 && (yyin = fopen(av[1], "r")) == NULL) { @@ -110,6 +118,9 @@ int main(int ac, char **av) if (strstr(parseinfo, ": error: ") == NULL) { create_diagram(); } + if (show_dump == 1) { + create_dump(); + } if (graphviz) { char *g_start = strstr(parseinfo, "diagram: graphviz: start\n"); if (g_start == NULL) exit(0); // empty output diff --git a/statements/pbrst.y b/statements/pbrst.y index 1eca334..68a0aa7 100644 --- a/statements/pbrst.y +++ b/statements/pbrst.y @@ -14,6 +14,7 @@ #endif // IN_BIBREF char *stmt_identifier; +double real_cover = 0.0; char *nt_book; char *nt_info; char *nt_verse; @@ -30,7 +31,7 @@ char *ot_verse; #define MAX_BOOK_LENGTH 175000 char introduction_substrings[MAX_SUBSTRINGS][MAX_SUBSTR_LENGTH + 1]; int intervals[MAX_INTERVALS][3]; // start, end, type -double intervals_data[MAX_INTERVALS]; // stored info (e.g., difference of fragments) +double intervals_data[MAX_INTERVALS]; // stored info (e.g., difference of fragments, number of substrings) #define NT_HEADLINE 0 #define NT_FRAGMENT 1 #define OT_PASSAGE 2 @@ -58,6 +59,12 @@ char fragments[MAX_INTERVALS][MAX_SUBSTR_LENGTH]; char books_s[MAX_INTERVALS][MAX_BOOKNAME_LENGTH]; // Bible editions char infos_s[MAX_INTERVALS][MAX_INFONAME_LENGTH]; // Bible books (in order of intervals) +// These must be stored fully (for the dump): +#define MAX_VERSE_LENGTH 30 +char verses_s[MAX_INTERVALS][MAX_VERSE_LENGTH]; +char declares[MAX_INTERVALS][MAX_SUBSTR_LENGTH]; +char identifies[MAX_INTERVALS][MAX_SUBSTR_LENGTH]; + bool unique_prep = false; // don't check unique occurrence (only if asked) bool addbooks_done = false; char *parseinfo = ""; @@ -70,6 +77,7 @@ bool no_evidence = false; // corrector options int correct_raw = 0; // fix raw positions if possible +int show_dump = 0; // if requested, print internal dump in BRST format #ifdef IN_BIBREF void init_addbooks() { @@ -225,7 +233,7 @@ int yylex (void); int yylex_destroy(); void yyerror (char *s, ...); void check_rawposition_length(char *s); -void save_string_in_introduction(char *s); +void save_string_in_introduction(char *s, int t); void check_nt_passage(char *book, char *info, char *verse); void check_ot_passage(char *book, char *info, char *verse); void check_introduction_passage(char *passage, char *ay); @@ -234,7 +242,8 @@ void check_unique_prepare(); void check_fragment(char *passage, char *ay_nt, char *ay_ot); double myatof(char *arr); void create_diagram(); -void reset_data(int correct_raw); +void create_dump(); +void reset_data(int cr, int sd); } %% @@ -312,8 +321,8 @@ introduction_explanations : introduction_explanation | introduction_explanation ALSO introduction_explanations; introduction_explanation - : DECLARES A QUOTATION WITH STRING { save_string_in_introduction($5); } - | IDENTIFIES THE SOURCE WITH STRING { save_string_in_introduction($5); }; + : DECLARES A QUOTATION WITH STRING { save_string_in_introduction($5, 0); } + | IDENTIFIES THE SOURCE WITH STRING { save_string_in_introduction($5, 1); }; fragments : fragments_description @@ -394,7 +403,7 @@ check_rawposition_length(char *s) } void -save_string_in_introduction(char *s) +save_string_in_introduction(char *s, int t) // t == 0: declares, t == 1: identifies { extern int yylineno; extern int yycolumn; @@ -402,6 +411,8 @@ save_string_in_introduction(char *s) char *l; l = greekToLatin1(s); strcpy(introduction_substrings[substrings++], l); + if (t==0) strcpy(declares[iv_counter-1], s); + if (t==1) strcpy(identifies[iv_counter-1], s); add_parseinfo("%d,%d: debug: found %s in input\n", yylineno, yycolumn, l); free(l); #endif // IN_BIBREF @@ -449,9 +460,10 @@ check_nt_passage(char *book, char *info, char *verse) } else { // TODO: This is fixable, the position should be corrected by getting the passage position: if (correct_raw == 1) { + int start = lookupVerseStart1(info, book, verse); + intervals[iv_counter-1][0] = start; + intervals[iv_counter-1][1] = start + strlen(l) - 1; add_parseinfo("%d,%d: corrected: results of lookup and getraw did not match\n", yylineno, yycolumn); - intervals[iv_counter-1][0] = lookupVerseStart1(info, book, verse); - intervals[iv_counter-1][1] = strlen(l); } else add_parseinfo("%d,%d: error: results of lookup and getraw do not match\n", yylineno, yycolumn); } @@ -461,7 +473,7 @@ check_nt_passage(char *book, char *info, char *verse) intervals[iv_counter-1][2] = NT_HEADLINE; // NT (headline) // strcpy(infos_s[iv_counter-1], nt_info); // strcpy(books_s[iv_counter-1], nt_book); - add_parseinfo("%d,%d: debug: interval %d is a headline NT passage\n", yylineno, yycolumn, iv_counter-1); + add_parseinfo("%d,%d: debug: interval %d %s is the headline NT passage\n", yylineno, yycolumn, iv_counter-1); #endif // IN_BIBREF } @@ -512,8 +524,9 @@ check_ot_passage(char *book, char *info, char *verse) add_parseinfo("%d,%d: info: results of lookup and getraw match\n", yylineno, yycolumn); } else { if (correct_raw == 1) { - intervals[iv_counter-1][0] = lookupVerseStart1(ot_info, ot_book, ot_verse); - intervals[iv_counter-1][1] = strlen(l); + int start = lookupVerseStart1(ot_info, ot_book, ot_verse); + intervals[iv_counter-1][0] = start; + intervals[iv_counter-1][1] = start + strlen(l) - 1; add_parseinfo("%d,%d: corrected: results of lookup and getraw did not match\n", yylineno, yycolumn); } else add_parseinfo("%d,%d: error: results of lookup and getraw do not match\n", yylineno, yycolumn); @@ -537,6 +550,7 @@ check_ot_passage(char *book, char *info, char *verse) strcpy(infos_s[iv_counter-1], ot_info); strcpy(books_s[iv_counter-1], ot_book); + strcpy(verses_s[iv_counter-1], ot_verse); // for the dump #endif // IN_BIBREF } @@ -568,6 +582,7 @@ check_introduction_passage(char *passage, char *ay) } // strcpy(infos_s[iv_counter-1], nt_info); // strcpy(books_s[iv_counter-1], nt_book); + strcpy(verses_s[iv_counter-1], passage); // for the dump if (err) return; // At least one of the checks was erroneous, so we return without comparison. @@ -576,9 +591,10 @@ check_introduction_passage(char *passage, char *ay) add_parseinfo("%d,%d: info: results of lookup and getraw match\n", yylineno, yycolumn); } else { // try to fix the incorrect raw position: if (correct_raw == 1) { + int start = lookupVerseStart1(nt_info, nt_book, passage); + intervals[iv_counter-1][0] = start; + intervals[iv_counter-1][1] = start + strlen(l) - 1; add_parseinfo("%d,%d: corrected: results of lookup and getraw did not match\n", yylineno, yycolumn); - intervals[iv_counter-1][0] = lookupVerseStart1(nt_info, nt_book, passage); - intervals[iv_counter-1][1] = strlen(l); } else { add_parseinfo("%d,%d: error: results of lookup and getraw do not match\n", yylineno, yycolumn); } @@ -663,6 +679,8 @@ check_fragment(char *passage, char *ay_nt, char *ay_ot) { free(ot_passage); intervals[iv_counter-2][2] = NT_FRAGMENT; // this is an NT fragment intervals[iv_counter-1][2] = OT_PASSAGE; // this is an OT fragment + strcpy(verses_s[iv_counter-2],passage); // for the dump + intervals_data[iv_counter-2] = difference; // save data for the diagram intervals_data[iv_counter-1] = count; // save data for the diagram (TODO: consider showing this only if unique_prep) if (fragments_start == -1) fragments_start = iv_counter-2; @@ -738,10 +756,10 @@ void check_cover(double cover) { } } } - double real_cover = (double)covered/union_length * 100.0; + real_cover = (double)covered/union_length * 100.0; if (fabs(real_cover-cover) <= 100*EPS) { add_parseinfo("%d,%d: info: cover %4.2f%% is correct\n", yylineno, yycolumn, cover); - } else { + } else { // TODO, fixable: add_parseinfo("%d,%d: error: cover %4.2f%% is incorrect (union length: %d, covered: %d), in reality %4.2f%%\n", yylineno, yycolumn, cover, union_length, covered, real_cover); } @@ -923,13 +941,8 @@ double myatof(char *arr) else return val; } -typedef struct yy_buffer_state * YY_BUFFER_STATE; -extern int yyparse(); -extern YY_BUFFER_STATE yy_scan_string(char * str); -extern void yy_delete_buffer(YY_BUFFER_STATE buffer); - -#define MAX_GRAPHVIZ_CODE_SIZE 16384 -char D[MAX_GRAPHVIZ_CODE_SIZE]; // diagram as text +#define MAX_CODE_SIZE 16384 +char D[MAX_CODE_SIZE]; // diagram or dump as text void create_diagram() { if (no_evidence) return; strcpy(D, ""); @@ -1248,7 +1261,116 @@ void create_diagram() { "diagram: graphviz: end\n", D); } -void reset_data(int correct_raw) { // important if a previous run was already performed +void strcat_interval(char *str, int a, int b) { +#define MAX_INTERVAL_STRING 50 + char interval[MAX_INTERVAL_STRING]; + sprintf(interval, " (%d-%d, length %d)", a, b, b-a+1); + strcat(str, interval); +} + +void strcat_percent(char *str, double p) { +#define MAX_PERCENT_STRING 15 + char percent[MAX_PERCENT_STRING]; + sprintf(percent, " %4.2f%%", p*100.0); + strcat(str, percent); +} + +void strcat_passage(char *str, char *book, char *info, char *verse) { + strcat(str, book); + strcat(str, " "); + strcat(str, info); + strcat(str, " "); + strcat(str, verse); +} + +void create_dump() { + strcpy(D, ""); + strcat(D, "Statement "); + if (stmt_identifier != NULL) { + strcat(D, stmt_identifier); + strcat(D, " "); + } + strcat(D, "connects\n "); + strcat_passage(D, nt_book, nt_info, nt_verse); + strcat_interval(D, intervals[0][0], intervals[0][1]); + strcat(D, " with\n"); + int iv=1; + for (int i=0; i0) { + strcat(D, " that\n declares a quotation with "); + strcat(D, declares[iv]); + infos++; + } + if (strlen(identifies[iv])>0) { + if (infos>0) + strcat(D, " also\n"); + else + strcat(D, " that\n"); + strcat(D, " identifies the source with "); + strcat(D, identifies[iv]); + infos++; + } + if (iv