diff --git a/jplag.frontend.rlang/README.md b/jplag.frontend.rlang/README.md new file mode 100644 index 000000000..67b433494 --- /dev/null +++ b/jplag.frontend.rlang/README.md @@ -0,0 +1,16 @@ +# JPlag R language frontend + +The JPlag R frontend allows the use of JPlag with submissions in R.
+It was in part adapted from a [JPLag fork by CodeGra-de](https://github.com/CodeGra-de/jplag/tree/master/jplag.frontend.R). + +### R specification compatibility +The underlying [grammar definition](https://github.com/antlr/grammars-v4/tree/master/r) was first created in June 2013, when R 3.0.1 was current. The latest commit is from April 2018, when R 3.5.0 was just released. Whether the grammar has been made to comply with any specific version of the R specification is unclear. Even if some parsing errors occur, the parser should be able to recover and still produce a valid analysis. + +### Token Extraction + +The choice of tokens is based directly on the CodeGra-de version, whereas the extraction process itself contains some fixes. + +Like in other frontends, e.g. for Java and C#, the tokens account for the beginning and the end of control flow structures, for control flow keywords, and some kinds of expressions. As R is very different from other programming languages in JPlag, it remains to be seen whether the R frontend can hold up to the others. + +### Usage +To use the R frontend, add the `-l R` flag in the CLI, or use a `JPlagOption` object set to `LanguageOption.R` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag). \ No newline at end of file diff --git a/jplag.frontend.rlang/pom.xml b/jplag.frontend.rlang/pom.xml new file mode 100644 index 000000000..0683719db --- /dev/null +++ b/jplag.frontend.rlang/pom.xml @@ -0,0 +1,45 @@ + + + 4.0.0 + + de.jplag + aggregator + ${revision} + + rlang + + + + org.antlr + antlr4-runtime + + + de.jplag + frontend-utils + + + de.jplag + frontend-testutils + ${revision} + test-jar + test + + + + + + + org.antlr + antlr4-maven-plugin + + + + antlr4 + + + + + + + + diff --git a/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/R.g4 b/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/R.g4 new file mode 100644 index 000000000..73bd2389c --- /dev/null +++ b/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/R.g4 @@ -0,0 +1,216 @@ +/* + [The "BSD licence"] + Copyright (c) 2013 Terence Parr + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** +derived from http://svn.r-project.org/R/trunk/src/main/gram.y +http://cran.r-project.org/doc/manuals/R-lang.html#Parser +I'm no R genius but this seems to work. +Requires RFilter.g4 to strip away NL that are really whitespace, +not end-of-command. See TestR.java +Usage: +$ antlr4 R.g4 RFilter.g4 +$ javac *.java +$ java TestR sample.R +... prints parse tree ... +*/ + +/* +Modified version of the original in https://github.com/antlr/grammars-v4/blob/master/r/R.g4 so that I can separate the most relevant tokens of R in +the JplagRListenter.java file. +Author of the modification: Antonio Javier Rodriguez Perez +*/ + +grammar R; + +prog: ( expr (';'|NL) + | NL + )* + EOF + ; + +/* +expr_or_assign + : expr ('<-'|'='|'<<-') expr_or_assign + | expr + ; +*/ + +expr: expr index_statement // '[[' follows R's yacc grammar + | expr access_package expr + | expr ('$'|'@') expr + | expr '^' expr + | ('-'|'+') expr + | expr ':' expr + | expr USER_OP expr // anything wrappedin %: '%' .* '%' + | expr ('*'|'/') expr + | expr ('+'|'-') expr + | expr ('>'|'>='|'<'|'<='|'=='|'!=') expr + | '!' expr + | expr ('&'|'&&') expr + | expr ('|'|'||') expr + | '~' expr + | expr '~' expr + | expr assign_value expr + | function_definition // define function + | expr function_call // call function + | compound_statement + | if_statement + | for_statement + | while_statement + | repeat_statement + | help + | next_statement + | break_statement + | '(' expr ')' + | ID + | constant + ; + +index_statement : '[[' sublist ']' ']' | '[' sublist ']' ; + +access_package: '::'|':::' ; + +function_definition: 'function' '(' formlist? ')' expr ; + +function_call : '(' sublist ')' ; + +constant: constant_number | constant_string | constant_bool | 'NULL' | 'NA' | 'Inf' | 'NaN' ; + +constant_number: HEX | INT | FLOAT | COMPLEX ; + +constant_string: STRING ; + +constant_bool: 'TRUE' | 'FALSE' ; + +help: '?' expr ; // get help on expr, usually string or ID + +if_statement : 'if' '(' expr ')' expr | 'if' '(' expr ')' expr 'else' expr ; + +for_statement : 'for' '(' ID 'in' expr ')' expr ; + +while_statement : 'while' '(' expr ')' expr ; + +repeat_statement: 'repeat' expr ; + +next_statement: 'next' ; + +break_statement: 'break' ; + +compound_statement: '{' exprlist '}' ; + +exprlist + : expr ((';'|NL) expr?)* + | + ; + +formlist : form (',' form)* ; + +form: ID + | assign_func_declaration + ; + +sublist : sub (',' sub)* ; + +sub : expr + | assign_value_list + | + ; + +assign_value: '<-'|'<<-'|'='|'->'|'->>'|':='; + +assign_func_declaration: ID '=' expr | '...' ; + +assign_value_list: ID '=' | ID '=' expr | constant_string '=' | constant_string '=' expr | 'NULL' '=' | 'NULL' '=' expr | '...' ; + + + +HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ; + +INT : DIGIT+ [Ll]? ; + +fragment +HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ; + +FLOAT: DIGIT+ '.' DIGIT* EXP? [Ll]? + | DIGIT+ EXP? [Ll]? + | '.' DIGIT+ EXP? [Ll]? + ; + +fragment +DIGIT: '0'..'9' ; + +fragment +EXP : ('E' | 'e') ('+' | '-')? INT ; + +COMPLEX + : INT 'i' + | FLOAT 'i' + ; + +STRING + : '"' ( ESC | ~[\\"] )*? '"' + | '\'' ( ESC | ~[\\'] )*? '\'' + | '`' ( ESC | ~[\\'] )*? '`' + ; +fragment +ESC : '\\' [abtnfrv"'\\] + | UNICODE_ESCAPE + | HEX_ESCAPE + | OCTAL_ESCAPE + ; + +fragment +UNICODE_ESCAPE + : '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + | '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}' + ; + +fragment +OCTAL_ESCAPE + : '\\' [0-3] [0-7] [0-7] + | '\\' [0-7] [0-7] + | '\\' [0-7] + ; + +fragment +HEX_ESCAPE + : '\\' HEXDIGIT HEXDIGIT? + ; + +ID : '.' (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')* + | LETTER (LETTER|DIGIT|'_'|'.')* + ; + +fragment LETTER : [a-zA-Z] ; + +USER_OP : '%' .*? '%' ; + +COMMENT : '#' .*? '\r'? '\n' -> type(NL) ; + +// Match both UNIX and Windows newlines +NL : '\r'? '\n' ; + +WS : [ \t\u000C]+ -> skip ; \ No newline at end of file diff --git a/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/RFilter.g4 b/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/RFilter.g4 new file mode 100644 index 000000000..d66b85aa2 --- /dev/null +++ b/jplag.frontend.rlang/src/main/antlr4/de/jplag/R/grammar/RFilter.g4 @@ -0,0 +1,83 @@ +/* + [The "BSD licence"] + Copyright (c) 2013 Terence Parr + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** Must process R input with this before passing to R.g4; see TestR.java + We strip NL inside expressions. + */ + +parser grammar RFilter; + +options { tokenVocab=R; } + +@members { +protected int curlies = 0; +} + +// TODO: MAKE THIS GET ONE COMMAND ONLY +stream : (element|NL|';')* EOF ; + +eat : (NL {((WritableToken)$NL).setChannel(Token.HIDDEN_CHANNEL);})+ ; + +element: op eat? + | atom + | '{' eat? {curlies++;} (element|NL|';')* {curlies--;} '}' + | '(' (element|eat)* ')' + | '[' (element|eat)* ']' + | '[[' (element|eat)* ']' ']' + | 'function' eat? '(' (element|eat)* ')' eat? + | 'for' eat? '(' (element|eat)* ')' eat? + | 'while' eat? '(' (element|eat)* ')' eat? + | 'if' eat? '(' (element|eat)* ')' eat? + | 'else' + { + // ``inside a compound expression, a newline before else is discarded, + // whereas at the outermost level, the newline terminates the if + // construction and a subsequent else causes a syntax error.'' + /* + Works here + if (1==0) { print(1) } else { print(2) } + and correctly gets error here: + if (1==0) { print(1) } + else { print(2) } + this works too: + if (1==0) { + if (2==0) print(1) + else print(2) + } + */ + WritableToken tok = (WritableToken)_input.LT(-2); + if (curlies>0&&tok.getType()==NL) tok.setChannel(Token.HIDDEN_CHANNEL); + } + ; + +atom: 'next' | 'break' | ID | STRING | HEX | INT | FLOAT | COMPLEX | 'NULL' + | 'NA' | 'Inf' | 'NaN' | 'TRUE' | 'FALSE' + ; + +op : '+'|'-'|'*'|'/'|'^'|'<'|'<='|'>='|'>'|'=='|'!='|'&'|'&&'|USER_OP| + 'repeat'|'in'|'?'|'!'|'='|':'|'~'|'$'|'@'|'<-'|'->'|'='|'::'|':::'| + ','|'...'|'||'| '|' + ; \ No newline at end of file diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/JplagRListener.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/JplagRListener.java new file mode 100644 index 000000000..da686287b --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/JplagRListener.java @@ -0,0 +1,158 @@ +package de.jplag.R; + +import org.antlr.v4.runtime.Token; + +import de.jplag.R.grammar.*; + +/** + * Listener class for visiting the R ANTLR parse tree. Transforms the visited ANTLR token into JPlag tokens. Based on an + * R frontend for JPlag v2.15 by Olmo Kramer, see their + * JPlag fork. + * @author Robin Maisch + */ +public class JplagRListener extends RCombinedBaseListener implements RTokenConstants { + + private final RParserAdapter parserAdapter; + + /** + * Creates the listener. + * @param parserAdapter the JPlag parser adapter which receives the transformed tokens. + */ + public JplagRListener(RParserAdapter parserAdapter) { + this.parserAdapter = parserAdapter; + } + + /** + * Transforms an ANTLR Token into a JPlag token and transfers it to the token adapter. + * @param targetType the type of the JPlag token to be created. + * @param token the ANTLR token. + */ + private void transformToken(int targetType, Token token) { + parserAdapter.addToken(targetType, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length()); + } + + private void transformToken(int targetType, Token start, Token end) { + parserAdapter.addToken(targetType, start.getLine(), start.getCharPositionInLine() + 1, end.getStopIndex() - start.getStartIndex() + 1); + } + + @Override + public void enterIndex_statement(RParser.Index_statementContext context) { + transformToken(INDEX, context.getStart(), context.getStop()); + } + + @Override + public void enterAccess_package(RParser.Access_packageContext context) { + transformToken(PACKAGE, context.getStart()); + } + + @Override + public void enterFunction_definition(RParser.Function_definitionContext context) { + transformToken(BEGIN_FUNCTION, context.getStart()); + } + + @Override + public void exitFunction_definition(RParser.Function_definitionContext context) { + transformToken(END_FUNCTION, context.getStop()); + } + + @Override + public void enterFunction_call(RParser.Function_callContext context) { + transformToken(FUNCTION_CALL, context.getStart(), context.getStop()); + } + + @Override + public void enterConstant_number(RParser.Constant_numberContext context) { + transformToken(NUMBER, context.getStart()); + } + + @Override + public void enterConstant_string(RParser.Constant_stringContext context) { + transformToken(STRING, context.getStart()); + } + + @Override + public void enterConstant_bool(RParser.Constant_boolContext context) { + transformToken(BOOL, context.getStart()); + } + + @Override + public void enterHelp(RParser.HelpContext context) { + transformToken(HELP, context.getStart()); + } + + @Override + public void enterIf_statement(RParser.If_statementContext context) { + transformToken(IF_BEGIN, context.getStart()); + } + + @Override + public void exitIf_statement(RParser.If_statementContext context) { + transformToken(IF_END, context.getStop()); + } + + @Override + public void enterFor_statement(RParser.For_statementContext context) { + transformToken(FOR_BEGIN, context.getStart()); + } + + @Override + public void exitFor_statement(RParser.For_statementContext context) { + transformToken(FOR_END, context.getStop()); + } + + @Override + public void enterWhile_statement(RParser.While_statementContext context) { + transformToken(WHILE_BEGIN, context.getStart()); + } + + @Override + public void exitWhile_statement(RParser.While_statementContext context) { + transformToken(WHILE_END, context.getStop()); + } + + @Override + public void enterRepeat_statement(RParser.Repeat_statementContext context) { + transformToken(REPEAT_BEGIN, context.getStart()); + } + + @Override + public void exitRepeat_statement(RParser.Repeat_statementContext context) { + transformToken(REPEAT_END, context.getStop()); + } + + @Override + public void enterNext_statement(RParser.Next_statementContext context) { + transformToken(NEXT, context.getStart()); + } + + @Override + public void enterBreak_statement(RParser.Break_statementContext context) { + transformToken(BREAK, context.getStart()); + } + + @Override + public void enterCompound_statement(RParser.Compound_statementContext context) { + transformToken(COMPOUND_BEGIN, context.getStart()); + } + + @Override + public void exitCompound_statement(RParser.Compound_statementContext context) { + transformToken(COMPOUND_END, context.getStop()); + } + + @Override + public void enterAssign_value(RParser.Assign_valueContext context) { + transformToken(ASSIGN, context.getStart()); + } + + @Override + public void enterAssign_func_declaration(RParser.Assign_func_declarationContext context) { + transformToken(ASSIGN_FUNC, context.getStart()); + } + + @Override + public void enterAssign_value_list(RParser.Assign_value_listContext context) { + transformToken(ASSIGN_LIST, context.getStart()); + } + +} \ No newline at end of file diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/Language.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/Language.java new file mode 100644 index 000000000..34d694344 --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/Language.java @@ -0,0 +1,71 @@ +package de.jplag.R; + +import java.io.File; + +import de.jplag.ErrorConsumer; +import de.jplag.TokenList; + +/** + * This represents the R language as a language supported by JPlag. + */ +public class Language implements de.jplag.Language { + + public static final String NAME = "R Parser"; + public static final String SHORT_NAME = "R"; + public static final int DEFAULT_MIN_TOKEN_MATCH = 8; + private final RParserAdapter parserAdapter; + + public Language(ErrorConsumer consumer) { + this.parserAdapter = new RParserAdapter(consumer); + } + + @Override + public String[] suffixes() { + return new String[] {".R", ".r"}; + } + + @Override + public String getName() { + return NAME; + } + + @Override + public String getShortName() { + return SHORT_NAME; + } + + @Override + public int minimumTokenMatch() { + return DEFAULT_MIN_TOKEN_MATCH; + } + + @Override + public TokenList parse(File directory, String[] files) { + return parserAdapter.parse(directory, files); + } + + @Override + public boolean hasErrors() { + return parserAdapter.hasErrors(); + } + + @Override + public boolean supportsColumns() { + return true; + } + + @Override + public boolean isPreformatted() { + return true; + } + + @Override + public boolean usesIndex() { + return false; + } + + @Override + public int numberOfTokens() { + return RTokenConstants.NUM_DIFF_TOKENS; + } +} diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/RCombinedBaseListener.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/RCombinedBaseListener.java new file mode 100644 index 000000000..4ef4a8a6f --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/RCombinedBaseListener.java @@ -0,0 +1,342 @@ +package de.jplag.R; + +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import de.jplag.R.grammar.*; + +/** + * Empty base implementation for {@link RListener} and {@link RFilterListener}. + */ +public abstract class RCombinedBaseListener implements RListener, RFilterListener { + @Override + public void enterStream(RFilter.StreamContext context) { + + } + + @Override + public void exitStream(RFilter.StreamContext context) { + + } + + @Override + public void enterEat(RFilter.EatContext context) { + + } + + @Override + public void exitEat(RFilter.EatContext context) { + + } + + @Override + public void enterElement(RFilter.ElementContext context) { + + } + + @Override + public void exitElement(RFilter.ElementContext context) { + + } + + @Override + public void enterAtom(RFilter.AtomContext context) { + + } + + @Override + public void exitAtom(RFilter.AtomContext context) { + + } + + @Override + public void enterOp(RFilter.OpContext context) { + + } + + @Override + public void exitOp(RFilter.OpContext context) { + + } + + @Override + public void enterProg(RParser.ProgContext context) { + + } + + @Override + public void exitProg(RParser.ProgContext context) { + + } + + @Override + public void enterExpr(RParser.ExprContext context) { + + } + + @Override + public void exitExpr(RParser.ExprContext context) { + + } + + @Override + public void enterIndex_statement(RParser.Index_statementContext context) { + + } + + @Override + public void exitIndex_statement(RParser.Index_statementContext context) { + + } + + @Override + public void enterAccess_package(RParser.Access_packageContext context) { + + } + + @Override + public void exitAccess_package(RParser.Access_packageContext context) { + + } + + @Override + public void enterFunction_definition(RParser.Function_definitionContext context) { + + } + + @Override + public void exitFunction_definition(RParser.Function_definitionContext context) { + + } + + @Override + public void enterFunction_call(RParser.Function_callContext context) { + + } + + @Override + public void exitFunction_call(RParser.Function_callContext context) { + + } + + @Override + public void enterConstant(RParser.ConstantContext context) { + + } + + @Override + public void exitConstant(RParser.ConstantContext context) { + + } + + @Override + public void enterConstant_number(RParser.Constant_numberContext context) { + + } + + @Override + public void exitConstant_number(RParser.Constant_numberContext context) { + + } + + @Override + public void enterConstant_string(RParser.Constant_stringContext context) { + + } + + @Override + public void exitConstant_string(RParser.Constant_stringContext context) { + + } + + @Override + public void enterConstant_bool(RParser.Constant_boolContext context) { + + } + + @Override + public void exitConstant_bool(RParser.Constant_boolContext context) { + + } + + @Override + public void enterHelp(RParser.HelpContext context) { + + } + + @Override + public void exitHelp(RParser.HelpContext context) { + + } + + @Override + public void enterIf_statement(RParser.If_statementContext context) { + + } + + @Override + public void exitIf_statement(RParser.If_statementContext context) { + + } + + @Override + public void enterFor_statement(RParser.For_statementContext context) { + + } + + @Override + public void exitFor_statement(RParser.For_statementContext context) { + + } + + @Override + public void enterWhile_statement(RParser.While_statementContext context) { + + } + + @Override + public void exitWhile_statement(RParser.While_statementContext context) { + + } + + @Override + public void enterRepeat_statement(RParser.Repeat_statementContext context) { + + } + + @Override + public void exitRepeat_statement(RParser.Repeat_statementContext context) { + + } + + @Override + public void enterNext_statement(RParser.Next_statementContext context) { + + } + + @Override + public void exitNext_statement(RParser.Next_statementContext context) { + + } + + @Override + public void enterBreak_statement(RParser.Break_statementContext context) { + + } + + @Override + public void exitBreak_statement(RParser.Break_statementContext context) { + + } + + @Override + public void enterCompound_statement(RParser.Compound_statementContext context) { + + } + + @Override + public void exitCompound_statement(RParser.Compound_statementContext context) { + + } + + @Override + public void enterExprlist(RParser.ExprlistContext context) { + + } + + @Override + public void exitExprlist(RParser.ExprlistContext context) { + + } + + @Override + public void enterFormlist(RParser.FormlistContext context) { + + } + + @Override + public void exitFormlist(RParser.FormlistContext context) { + + } + + @Override + public void enterForm(RParser.FormContext context) { + + } + + @Override + public void exitForm(RParser.FormContext context) { + + } + + @Override + public void enterSublist(RParser.SublistContext context) { + + } + + @Override + public void exitSublist(RParser.SublistContext context) { + + } + + @Override + public void enterSub(RParser.SubContext context) { + + } + + @Override + public void exitSub(RParser.SubContext context) { + + } + + @Override + public void enterAssign_value(RParser.Assign_valueContext context) { + + } + + @Override + public void exitAssign_value(RParser.Assign_valueContext context) { + + } + + @Override + public void enterAssign_func_declaration(RParser.Assign_func_declarationContext context) { + + } + + @Override + public void exitAssign_func_declaration(RParser.Assign_func_declarationContext context) { + + } + + @Override + public void enterAssign_value_list(RParser.Assign_value_listContext context) { + + } + + @Override + public void exitAssign_value_list(RParser.Assign_value_listContext context) { + + } + + @Override + public void visitTerminal(TerminalNode node) { + + } + + @Override + public void visitErrorNode(ErrorNode node) { + + } + + @Override + public void enterEveryRule(ParserRuleContext context) { + + } + + @Override + public void exitEveryRule(ParserRuleContext context) { + + } +} diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/RParserAdapter.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/RParserAdapter.java new file mode 100644 index 000000000..b7c0cd42e --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/RParserAdapter.java @@ -0,0 +1,97 @@ +package de.jplag.R; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +import de.jplag.AbstractParser; +import de.jplag.ErrorConsumer; +import de.jplag.R.grammar.RFilter; +import de.jplag.R.grammar.RLexer; +import de.jplag.R.grammar.RParser; +import de.jplag.TokenList; + +/** + * This class sets up the lexer and parser generated by ANTLR4, feeds the submissions through them and passes the + * selected tokens on to the main program. + */ +public class RParserAdapter extends AbstractParser implements RTokenConstants { + + private String currentFile; + private TokenList tokens; + + /** + * Creates the RParserAdapter + * @param errorConsumer the ErrorConsumer that parser errors are passed on to. + */ + public RParserAdapter(ErrorConsumer errorConsumer) { + super(errorConsumer); + } + + /** + * Parsers a list of files into a single {@link TokenList}. + * @param directory the directory of the files. + * @param fileNames the file names of the files. + * @return a {@link TokenList} containing all tokens of all files. + */ + public TokenList parse(File directory, String[] fileNames) { + tokens = new TokenList(); + errors = 0; + for (String fileName : fileNames) { + if (!parseFile(directory, fileName)) { + errors++; + } + tokens.addToken(new RToken(FILE_END, fileName, -1, -1, -1)); + } + return tokens; + } + + private boolean parseFile(File directory, String fileName) { + File file = new File(directory, fileName); + try (FileInputStream inputStream = new FileInputStream(file)) { + currentFile = fileName; + + // create a lexer, a parser and a buffer between them. + RLexer lexer = new RLexer(CharStreams.fromStream(inputStream)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + + RFilter filter = new RFilter(tokens); + filter.stream(); + tokens.seek(0); + + RParser parser = new RParser(tokens); + + // Create a tree walker and the entry context defined by the parser grammar + ParserRuleContext entryContext = parser.prog(); + ParseTreeWalker treeWalker = new ParseTreeWalker(); + + // Walk over the parse tree: + for (int i = 0; i < entryContext.getChildCount(); i++) { + ParseTree parseTree = entryContext.getChild(i); + treeWalker.walk(new JplagRListener(this), parseTree); + } + } catch (IOException exception) { + getErrorConsumer().addError("Parsing Error in '" + fileName + "':" + File.separator + exception); + return false; + } + return true; + } + + /** + * Adds a new {@link de.jplag.Token} to the current {@link TokenList}. + * @param type the type of the new {@link de.jplag.Token} + * @param line the line of the Token in the current file + * @param start the start column of the Token in the line + * @param length the length of the Token + */ + /* package-private */ void addToken(int type, int line, int start, int length) { + tokens.addToken(new RToken(type, currentFile, line, start, length)); + + } +} diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/RToken.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/RToken.java new file mode 100644 index 000000000..21089f19d --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/RToken.java @@ -0,0 +1,46 @@ +package de.jplag.R; + +/** + * This class represents the occurrence of an R Token in the source code. Based on an R frontend for JPlag v2.15 by Olmo + * Kramer, see their JPlag fork. + */ +public class RToken extends de.jplag.Token implements RTokenConstants { + + public RToken(int type, String file, int line, int column, int length) { + super(type, file, line, column, length); + } + + @Override + public String type2string() { + return switch (this.type) { + case FILE_END -> ""; + case SEPARATOR_TOKEN -> "METHOD_SEPARATOR"; + case BEGIN_FUNCTION -> "FUNCTION{"; + case END_FUNCTION -> "}FUNCTION"; + case FUNCTION_CALL -> "FUNCTION()"; + case NUMBER -> "NUMBER"; + case STRING -> "STRING"; + case BOOL -> "BOOL"; + case ASSIGN -> "ASSIGN"; + case ASSIGN_FUNC -> "ASSIGN_FUNC"; + case ASSIGN_LIST -> "ASSIGN_LIST"; + case HELP -> "HELP"; + case INDEX -> "INDEX"; + case PACKAGE -> "PACKAGE"; + case IF_BEGIN -> "IF{"; + case IF_END -> "}IF-ELSE"; + case FOR_BEGIN -> "FOR{"; + case FOR_END -> "}FOR"; + case WHILE_BEGIN -> "WHILE{"; + case WHILE_END -> "}WHILE"; + case REPEAT_BEGIN -> "REPEAT{"; + case REPEAT_END -> "}REPEAT"; + case NEXT -> "NEXT"; + case BREAK -> "BREAK"; + case COMPOUND_BEGIN -> "COMPOUND{"; + case COMPOUND_END -> "}COMPOUND"; + default -> "".formatted(type); + }; + } + +} diff --git a/jplag.frontend.rlang/src/main/java/de/jplag/R/RTokenConstants.java b/jplag.frontend.rlang/src/main/java/de/jplag/R/RTokenConstants.java new file mode 100644 index 000000000..7f368d132 --- /dev/null +++ b/jplag.frontend.rlang/src/main/java/de/jplag/R/RTokenConstants.java @@ -0,0 +1,39 @@ +package de.jplag.R; + +import de.jplag.TokenConstants; + +/** + * Tokens in R that are deemed important when comparing submissions for plagiarisms. Based on an R frontend for JPlag + * v2.15 by Olmo Kramer, see their JPlag + * fork. + * @author Robin Maisch + */ +public interface RTokenConstants extends TokenConstants { + + int BEGIN_FUNCTION = 2; + int END_FUNCTION = 3; + int FUNCTION_CALL = 4; + int NUMBER = 5; + int STRING = 6; + int BOOL = 7; + int ASSIGN = 8; + int ASSIGN_FUNC = 9; + int ASSIGN_LIST = 10; + int HELP = 11; + int INDEX = 12; + int PACKAGE = 13; + int IF_BEGIN = 14; + int IF_END = 15; + int FOR_BEGIN = 16; + int FOR_END = 17; + int WHILE_BEGIN = 18; + int WHILE_END = 19; + int REPEAT_BEGIN = 20; + int REPEAT_END = 21; + int NEXT = 22; + int BREAK = 23; + int COMPOUND_BEGIN = 24; + int COMPOUND_END = 25; + + int NUM_DIFF_TOKENS = 26; +} diff --git a/jplag.frontend.rlang/src/test/java/de/jplag/R/RFrontendTest.java b/jplag.frontend.rlang/src/test/java/de/jplag/R/RFrontendTest.java new file mode 100644 index 000000000..055ce5ad5 --- /dev/null +++ b/jplag.frontend.rlang/src/test/java/de/jplag/R/RFrontendTest.java @@ -0,0 +1,114 @@ +package de.jplag.R; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.jplag.Token; +import de.jplag.TokenConstants; +import de.jplag.TokenList; +import de.jplag.TokenPrinter; +import de.jplag.testutils.TestErrorConsumer; + +public class RFrontendTest { + + /** + * Regular expression for lines that contain no code. + */ + private static final String R_NO_CODE_LINE = "\\s*(?:#.*)?"; + + /** + * Test source file that is supposed to produce a complete set of tokens, i.e. all types of tokens. + */ + private static final String COMPLETE_TEST_FILE = "Complete.R"; + public static final int NOT_SET = -1; + + private final Logger logger = LoggerFactory.getLogger("R frontend test"); + private final String[] testFiles = new String[] {"Game.R", COMPLETE_TEST_FILE}; + private final File testFileLocation = Path.of("src", "test", "resources", "de", "jplag", "R").toFile(); + private Language language; + + @BeforeEach + void setup() { + TestErrorConsumer consumer = new TestErrorConsumer(); + language = new Language(consumer); + } + + @Test + void parseTestFiles() { + for (String fileName : testFiles) { + TokenList tokens = language.parse(testFileLocation, new String[] {fileName}); + String output = TokenPrinter.printTokens(tokens, testFileLocation, List.of(fileName)); + logger.info(output); + + testSourceCoverage(fileName, tokens); + if (fileName.equals(COMPLETE_TEST_FILE)) + testTokenCoverage(tokens, fileName); + } + } + + /** + * Confirms that the code is covered to a basic extent, i.e. each line of code contains at least one token. + * @param fileName a code sample file name + * @param tokens the TokenList generated from the sample + */ + private void testSourceCoverage(String fileName, TokenList tokens) { + File testFile = new File(testFileLocation, fileName); + + try { + List lines = Files.readAllLines(testFile.toPath()); + String emptyLineExpression = getNoCodeLineExpression(); + + // All lines that contain code + var codeLines = IntStream.range(1, lines.size() + 1).filter(idx -> !lines.get(idx - 1).matches(emptyLineExpression)).toArray(); + // All lines that contain token + var tokenLines = IntStream.range(0, tokens.size()).mapToObj(tokens::getToken).mapToInt(Token::getLine).distinct().toArray(); + + if (codeLines.length > tokenLines.length) { + var diffLine = IntStream.range(0, codeLines.length) + .dropWhile(lineIndex -> lineIndex < tokenLines.length && codeLines[lineIndex] == tokenLines[lineIndex]).findFirst(); + diffLine.ifPresent( + lineIdx -> fail("Line %d of file '%s' is not represented in the token list.".formatted(codeLines[lineIdx], fileName))); + } + assertArrayEquals(codeLines, tokenLines); + } catch (IOException exception) { + logger.info("Error while reading test file %s".formatted(fileName), exception); + fail(); + } + } + + /** + * Confirms that all Token types are 'reachable' with a complete code example. + * @param tokens TokenList which is supposed to contain all types of tokens + * @param fileName The file name of the complete code example + */ + private void testTokenCoverage(TokenList tokens, String fileName) { + var foundTokens = StreamSupport.stream(tokens.allTokens().spliterator(), true).mapToInt(Token::getType).sorted().distinct().toArray(); + // Exclude SEPARATOR_TOKEN, as it does not occur + var allTokens = IntStream.range(0, RTokenConstants.NUM_DIFF_TOKENS).filter(i -> i != TokenConstants.SEPARATOR_TOKEN).toArray(); + + if (allTokens.length > foundTokens.length) { + var diffLine = IntStream.range(0, allTokens.length) + .dropWhile(lineIndex -> lineIndex < foundTokens.length && allTokens[lineIndex] == foundTokens[lineIndex]).findFirst(); + diffLine.ifPresent(lineIdx -> fail("Token type %s was not found in the complete code example '%s'." + .formatted(new RToken(allTokens[lineIdx], fileName, NOT_SET, NOT_SET, NOT_SET).type2string(), fileName))); + } + assertArrayEquals(allTokens, foundTokens); + } + + private static String getNoCodeLineExpression() { + return R_NO_CODE_LINE; + } + +} diff --git a/jplag.frontend.rlang/src/test/resources/de/jplag/R/Complete.R b/jplag.frontend.rlang/src/test/resources/de/jplag/R/Complete.R new file mode 100644 index 000000000..2be42db33 --- /dev/null +++ b/jplag.frontend.rlang/src/test/resources/de/jplag/R/Complete.R @@ -0,0 +1,37 @@ +# This R code sample is supposed to contain the corresponding AST structure for each type of RToken. +# It is also working code. +# Author: Robin Maisch + +main <- function() { + sixteen <- square(4); + squareOkay <- ifelse(sixteen==16, TRUE, FALSE); + cat("Should be 16: ", sixteen, squareOkay); + ?cat; + + if (squareOkay) { + perfectSquare <- square + } else { + perfectSquare <- function (x) x*x + } + + for (i in 1:10) { + print(apply(arg=i, func=perfectSquare)) + } + + repeat { + if (FALSE) next + else break + } + + idx <- 0 + while (FALSE) { + idx <- idx + 1 + print((0:40)[idx=]) + } +} +oneMore <- function(var) base::'+'(var, 1) # weird way to access + operator +square <- function(x) x+x # works for x=2, so that's not bad +identity <- function(x) x +apply <- function(arg=0, func=identity) func(arg) + +main() diff --git a/jplag.frontend.rlang/src/test/resources/de/jplag/R/Game.R b/jplag.frontend.rlang/src/test/resources/de/jplag/R/Game.R new file mode 100644 index 000000000..3ff3201b0 --- /dev/null +++ b/jplag.frontend.rlang/src/test/resources/de/jplag/R/Game.R @@ -0,0 +1,92 @@ +# Sample R program +# Author: Robin Maisch + +readBool <- function() +{ + n <- readline(prompt="Yes or no? (y/n): ") + while (!grepl("[YyNn]",n)) + { + n <- readline() + } + return(ifelse(n=="y"||n=="Y", TRUE, FALSE)) +} + + +getFilterBalance <- function(f, elements) { + return(abs(length(Filter(f, elements)) - length(elements)/2)) +} + +divisibleByFilter <- function(i) { + res <- c(function(x) x %% i == 0, paste("a multiple of", i), NA); + return(res) +} + +greaterThanFilter <- function(i) { + res <- c(function(x) x > i, paste("greater than", i), NA); + return(res) +} + +smallerThanFilter <- function(i) { + res <- c(function(x) x < i, paste("smaller than", i), NA); + return(res) +} + +endsWithFilter <- function(i) { + res <- c(function(x) x %% 10 == i, paste("'s last digit a'", i), NA); + return(res) +} + +# real program start here + +main <- function() { + filters <- c(lapply(2:20, divisibleByFilter), + lapply(0:100, greaterThanFilter), + lapply(0:100, smallerThanFilter), + lapply(0:10, endsWithFilter)) + filters <- aperm(simplify2array(filters, higher=FALSE), c(2,1)) + activeFilters = list() + + cat("Think of a number between 0 and 100.\n") + count <- 1 + candidates <- c() + repeat { + cat("\n +++ Round",count,"+++\n\n") + candidates = 0:100 + for (f in activeFilters) { + filterFunc = ifelse(f[[3]], function(x) f[[1]](x), function(x) !f[[1]](x)) + candidates = Filter(filterFunc, candidates) + } + if (length(candidates) == 1) { + break + } + + balance <- sapply(1:(length(filters)/3), function(i) {return(getFilterBalance(filters[i,][[1]], candidates))}) + rank <- rank(balance, ties="first") + risk = max(20-2*count, 1) + lowerEnd = 1 + topFiveIdx = order(rank)[lowerEnd:risk] + # print(filters[topFiveIdx[1:3],2]) + winnerIdx = topFiveIdx[sample.int(risk-lowerEnd + 1,1)] + winnerFilter = filters[winnerIdx,] + + cat("Is your number", winnerFilter[[2]], "?\n") + winnerFilter[[3]] <- readBool() + + cat("Okay, now, let's see...\n") + activeFilters[[length(activeFilters)+1]] = winnerFilter + # cat("Now we have", length(activeFilters), "active filters.\n") + + filters[- winnerIdx,] # remove element + count <- count + 1 + } + + cat("Your number must be", candidates[1], ", right???\n") + answer <- readBool() + if (answer) { + cat("Hahaha, I knew it. I'm a genius.\n") + } else { + cat("You must be joking!\n") + } + +} +main() diff --git a/jplag/pom.xml b/jplag/pom.xml index 62bb59704..d01ca4128 100644 --- a/jplag/pom.xml +++ b/jplag/pom.xml @@ -45,6 +45,10 @@ de.jplag cpp + + de.jplag + rlang + de.jplag scheme diff --git a/jplag/src/main/java/de/jplag/options/LanguageOption.java b/jplag/src/main/java/de/jplag/options/LanguageOption.java index d3a313e83..fcde70724 100644 --- a/jplag/src/main/java/de/jplag/options/LanguageOption.java +++ b/jplag/src/main/java/de/jplag/options/LanguageOption.java @@ -13,6 +13,7 @@ public enum LanguageOption { PYTHON_3("python3", "de.jplag.python3.Language"), C_CPP("cpp", "de.jplag.cpp.Language"), C_SHARP("csharp", "de.jplag.csharp.Language"), + R_LANG("rlang", "de.jplag.rlang.Language"), CHAR("char", "de.jplag.chars.Language"), TEXT("text", "de.jplag.text.Language"), SCHEME("scheme", "de.jplag.scheme.Language"); @@ -34,7 +35,7 @@ public String getDisplayName() { } public static LanguageOption fromDisplayName(String displayName) { - return Arrays.stream(LanguageOption.values()).filter(languageOption -> languageOption.displayName.equals(displayName)).findFirst() + return Arrays.stream(LanguageOption.values()).filter(languageOption -> languageOption.displayName.equalsIgnoreCase(displayName)).findFirst() .orElse(getDefault()); } diff --git a/pom.xml b/pom.xml index d80c71a92..2f5925f0f 100644 --- a/pom.xml +++ b/pom.xml @@ -49,6 +49,7 @@ jplag.frontend.csharp-6 jplag.frontend.java jplag.frontend.python-3 + jplag.frontend.rlang jplag.frontend.scheme jplag.frontend.text jplag @@ -156,6 +157,11 @@ csharp-6 ${project.version} + + ${project.groupId} + rlang + ${project.version} + ${project.groupId} cpp