diff --git a/.gitignore b/.gitignore index 9cf6834..36df0fd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ target logs users.json sudoku.log +vol8/1.utext +vol8/2.utext +vol8/3.utext diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Pattern.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Pattern.java deleted file mode 100644 index cabc7af..0000000 --- a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Pattern.java +++ /dev/null @@ -1,481 +0,0 @@ -package ru.mifi.practice.vol8.regexp; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Deque; -import java.util.LinkedList; -import java.util.List; -import java.util.Optional; - -public final class Pattern { - private final Element root; - - private Pattern(Element root) { - this.root = root; - } - - @Override - public String toString() { - return root.toString().replaceAll("\\s+", ""); - } - - private static String toString(List elements) { - StringBuilder result = new StringBuilder(); - for (Element element : elements) { - result.append(element.toString()); - } - return result.toString(); - } - - public static Pattern compile(String text) { - return new Pattern(new Parser(new Lexer(text)).parse()); - } - - public interface Element { - - record Any() implements Element { - @Override - public String toString() { - return "."; - } - } - - record Symbol(char symbol) implements Element { - @Override - public String toString() { - return String.valueOf(symbol); - } - } - - record Star(Element element) implements Element { - @Override - public String toString() { - return element.toString() + "*"; - } - } - - record Question(Element element) implements Element { - @Override - public String toString() { - return element.toString() + "?"; - } - } - - record Plus(Element element) implements Element { - @Override - public String toString() { - return element.toString() + "+"; - } - } - } - - public interface Sequence extends Element { - - default Sequence addCharacter(char character) { - return add(new Symbol(character)); - } - - Sequence add(Element element); - - void addAny(); - - class Or extends Simple { - @Override - public String toString() { - StringBuilder result = new StringBuilder(); - Element[] el = elements.toArray(new Element[0]); - for (int i = 0; i < el.length; i++) { - if (i > 0) { - result.append(" | "); - } - result.append(el[i].toString()); - } - return result.toString(); - } - } - - class Group extends Simple { - private char append = 0; - - @Override - public String toString() { - return "(" + Pattern.toString(elements) + ")" + (append != 0 ? append : ""); - } - - @Override - public Sequence add(Element element) { - if (elements.isEmpty()) { - elements.add(element); - } else { - Element last = elements.getLast(); - if (last instanceof Or or) { - or.add(element); - } else { - elements.add(element); - } - } - return this; - } - - public void or() { - Element last = elements.removeLast(); - if (elements.isEmpty()) { - Or or = new Or(); - or.add(last); - last = or; - } else { - Element next = elements.removeLast(); - if (next instanceof Or or) { - or.add(last); - last = or; - } else { - throw new UnsupportedOperationException("unexpected element: " + next); - } - } - elements.add(last); - } - } - - class ClassCharacters extends Simple { - @Override - public Sequence add(Element element) { - if (element instanceof Symbol) { - super.add(element); - } else { - throw new UnsupportedOperationException(); - } - return this; - } - - @Override - public String toString() { - return "[" + Pattern.toString(elements) + "]"; - } - } - - @SuppressWarnings("PMD.LooseCoupling") - class Simple implements Sequence { - protected final LinkedList elements = new LinkedList<>(); - - @Override - public Sequence add(Element element) { - elements.add(element); - return this; - } - - @Override - public void addAny() { - elements.add(new Any()); - } - - @Override - public String toString() { - return Pattern.toString(elements); - } - - public Sequence applyQuestion() { - elements.add(new Element.Question(elements.removeLast())); - return this; - } - - public Sequence applyStar() { - elements.add(new Element.Star(elements.removeLast())); - return this; - } - - public Sequence applyPlus() { - elements.add(new Element.Plus(elements.removeLast())); - return this; - } - } - - class Unary extends Simple { - public Unary(Element element) { - elements.add(element); - } - - @Override - public Sequence add(Element element) { - throw new UnsupportedOperationException(); - } - - @Override - public void addAny() { - throw new UnsupportedOperationException(); - } - } - - class Plus extends Unary { - Plus(Element element) { - super(element); - } - - @Override - public String toString() { - return Pattern.toString(elements) + "+"; - } - } - - class Star extends Unary { - Star(Element element) { - super(element); - } - - @Override - public String toString() { - return Pattern.toString(elements) + "*"; - } - } - - class Question extends Unary { - public Question(Element element) { - super(element); - } - - @Override - public String toString() { - return Pattern.toString(elements) + "?"; - } - } - } - - private record Parser(Lexer lexer) { - - private Element parse() { - Deque stack = new ArrayDeque<>(); - stack.push(new Sequence.Simple()); - while (!lexer.eof()) { - Optional optional = lexer.token(); - if (optional.isEmpty()) { - break; - } - lexer.next(); - Token token = optional.get(); - switch (token.type) { - case SYMBOL, QUOTED_SYMBOL -> { - Sequence sequence = stack.peek(); - if (sequence instanceof Sequence.Unary) { - stack.push(new Sequence.Simple().addCharacter(token.ch)); - } else { - sequence.addCharacter(token.ch); - } - } - case GROUP_OPEN -> { - stack.push(new Sequence.Group()); - } - case CLASS_CLOSE, GROUP_CLOSE -> { - Sequence sequence = stack.pop(); - stack.peek().add(sequence); - } - case CLASS_OPEN -> { - stack.push(new Sequence.ClassCharacters()); - } - case QUESTION -> { - stack.push(applyQuestion(stack.pop())); - } - case PLUS -> { - stack.push(applyPlus(stack.pop())); - } - case STAR -> { - stack.push(applyStar(stack.pop())); - } - case ANY -> { - stack.peek().addAny(); - } - case OR -> { - Sequence element = stack.pop(); - if (stack.isEmpty()) { - Sequence.Or or = new Sequence.Or(); - or.add(element); - stack.push(or); - } else if (element instanceof Sequence.Group group) { - group.or(); - stack.push(group); - } else { - Sequence sequence = stack.pop(); - if (sequence instanceof Sequence.Or or) { - or.add(element); - stack.push(or); - } else if (sequence instanceof Sequence.Group group) { - group.add(element); - stack.push(group); - } else { - Sequence.Or or = new Sequence.Or(); - Sequence temp = new Sequence.Simple(); - temp.add(sequence); - temp.add(element); - or.add(temp); - stack.push(or); - } - } - stack.push(new Sequence.Simple()); - } - case EOS -> { - } - default -> { - - } - } - } - - Sequence sequence = stack.pop(); - if (stack.isEmpty()) { - return sequence; - } - Deque tail = new ArrayDeque<>(); - stack.push(sequence); - while (!stack.isEmpty()) { - Sequence pop = stack.pop(); - if (stack.isEmpty()) { - Sequence temp = new Sequence.Simple(); - while (!tail.isEmpty()) { - temp.add(tail.removeFirst()); - } - pop.add(temp); - return pop; - } else { - tail.push(pop); - } - } - throw new UnsupportedOperationException(); - } - - private static Sequence applyStar(Sequence sequence) { - if (sequence instanceof Sequence.Group || sequence instanceof Sequence.ClassCharacters) { - return new Sequence.Star(sequence); - } - return ((Sequence.Simple) sequence).applyStar(); - } - - private static Sequence applyPlus(Sequence sequence) { - if (sequence instanceof Sequence.Group || sequence instanceof Sequence.ClassCharacters) { - return new Sequence.Plus(sequence); - } - return ((Sequence.Simple) sequence).applyPlus(); - } - - private static Sequence applyQuestion(Sequence sequence) { - if (sequence instanceof Sequence.Group || sequence instanceof Sequence.ClassCharacters) { - return new Sequence.Question(sequence); - } - return ((Sequence.Simple) sequence).applyQuestion(); - } - } - - private static final class Lexer { - private final Token[] tokens; - private int it; - - private Lexer(String input) { - char[] chars = input.toCharArray(); - List tokens = new ArrayList<>(); - for (int it = 0; it < chars.length; it++) { - char ch = chars[it]; - switch (ch) { - case '(': { - tokens.add(new Token(ch, it, TokenType.GROUP_OPEN)); - break; - } - case ')': { - tokens.add(new Token(ch, it, TokenType.GROUP_CLOSE)); - break; - } - case '[': { - tokens.add(new Token(ch, it, TokenType.CLASS_OPEN)); - break; - } - case ']': { - tokens.add(new Token(ch, it, TokenType.CLASS_CLOSE)); - break; - } - case '\\': { - if (it + 1 < chars.length) { - ++it; - ch = chars[it]; - tokens.add(new Token(ch, it, TokenType.QUOTED_SYMBOL)); - } - break; - } - case '+': { - tokens.add(new Token(ch, it, TokenType.PLUS)); - break; - } - case '?': { - tokens.add(new Token(ch, it, TokenType.QUESTION)); - break; - } - case '*': { - tokens.add(new Token(ch, it, TokenType.STAR)); - break; - } - case '.': { - tokens.add(new Token(ch, it, TokenType.ANY)); - break; - } - case '|': { - tokens.add(new Token(ch, it, TokenType.OR)); - break; - } - case '$': { - tokens.add(new Token(ch, it, TokenType.EOS)); - break; - } - default: { - tokens.add(new Token(ch, it, TokenType.SYMBOL)); - } - } - } - this.tokens = tokens.toArray(new Token[0]); - this.it = 0; - } - - private boolean eof() { - return it >= tokens.length; - } - - private Optional token() { - return Optional.ofNullable(tokens[it]); - } - - private boolean next() { - if (it < tokens.length) { - ++it; - } - return eof(); - } - } - - record Token(char ch, int pos, TokenType type) { - } - - enum TokenType { - EOL, - SYMBOL, - /** - * \\, \(, \) - */ - QUOTED_SYMBOL, - /** - * ( - */ - GROUP_OPEN, - /** - * ) - */ - GROUP_CLOSE, - /** - * [ - */ - CLASS_OPEN, - /** - * ] - */ - CLASS_CLOSE, - QUESTION, - PLUS, - STAR, - ANY, - OR, - EOS - } -} diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Tree.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Tree.java new file mode 100644 index 0000000..dcd13ae --- /dev/null +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Tree.java @@ -0,0 +1,465 @@ +package ru.mifi.practice.vol8.regexp; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public interface Tree { + + Node root(); + + void visit(Visitor visitor); + + enum Operator { + STAR { + @Override + public String toString() { + return "*"; + } + }, + PLUS { + @Override + public String toString() { + return "+"; + } + }, + QUESTION { + @Override + public String toString() { + return "?"; + } + } + } + + interface Node { + default void visit(Visitor visitor) { + //Nothing + } + + default boolean isEmpty() { + return false; + } + + String toText(); + } + + interface Visitor { + void visit(Char ch); + + void enter(And and); + + void exit(And and); + + void enter(Or or); + + void exit(Or or); + + void enter(Unary unary); + + void exit(Unary unary); + + void enter(Group group); + + void exit(Group group); + + void enter(Range range); + + void exit(Range range); + + void enter(Set set); + + void exit(Set set); + + void start(); + + void end(); + + void any(); + } + + record Empty() implements Node { + @Override + public boolean isEmpty() { + return true; + } + + @Override + public String toText() { + return ""; + } + } + + record Char(char ch) implements Node { + @Override + public void visit(Visitor visitor) { + visitor.visit(this); + } + + @Override + public String toText() { + return "" + ch; + } + + @Override + public String toString() { + return "" + ch; + } + } + + record Escape(char ch) implements Node { + @Override + public String toString() { + return "\\" + ch; + } + + @Override + public String toText() { + return "\\" + ch; + } + } + + record And(Node left, Node right) implements Node { + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + left.visit(visitor); + right.visit(visitor); + visitor.exit(this); + } + + @Override + public String toText() { + return left.toText() + "," + right.toText(); + } + + @Override + public String toString() { + return left + "" + right; + } + } + + record Or(List nodes) implements Node { + private Or(Node node) { + this(new ArrayList<>(List.of(node))); + } + + private Or add(Or or) { + this.nodes.addAll(or.nodes); + return this; + } + + public Or add(Node next) { + this.nodes.add(next); + return this; + } + + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + nodes.forEach(node -> node.visit(visitor)); + visitor.exit(this); + } + + @Override + public String toText() { + return nodes.stream().map(Node::toText).collect(Collectors.joining("|")); + } + + @Override + public String toString() { + return nodes.stream().map(Node::toString).collect(Collectors.joining("|")); + } + } + + record Unary(Operator operator, Node node) implements Node { + @Override + public String toString() { + return node + operator.toString(); + } + + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + node.visit(visitor); + visitor.exit(this); + } + + @Override + public String toText() { + switch (operator) { + case STAR: { + return "[" + node.toText() + "]"; + } + case PLUS: { + return node.toText() + ",[" + node.toText() + "]"; + } + case QUESTION: { + return "{" + node.toText() + "}"; + } + default: + throw new IllegalStateException("Unexpected value: " + operator); + } + } + } + + record Group(Node node) implements Node { + @Override + public String toString() { + return "(" + node + ")"; + } + + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + node.visit(visitor); + visitor.exit(this); + } + + @Override + public String toText() { + return node.toText(); + } + } + + record Range(Node start, Node end) implements Node { + @Override + public String toString() { + return start.toString() + "-" + end.toString(); + } + + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + start.visit(visitor); + end.visit(visitor); + visitor.exit(this); + } + + @Override + public String toText() { + return start.toText() + "-" + end.toText(); + } + } + + record Set(boolean positive, List nodes) implements Node { + @Override + public String toString() { + return "[" + (positive ? "" : "^") + nodes.stream().map(Node::toString).collect(Collectors.joining()) + "]"; + } + + @Override + public void visit(Visitor visitor) { + visitor.enter(this); + nodes.forEach(node -> node.visit(visitor)); + visitor.exit(this); + } + + @Override + public String toText() { + return "(" + nodes.stream().map(Node::toText).collect(Collectors.joining("|")) + ")"; + } + } + + record Any() implements Node { + @Override + public String toString() { + return "."; + } + + @Override + public void visit(Visitor visitor) { + visitor.any(); + } + + @Override + public String toText() { + return "."; + } + } + + final class Default implements Tree { + private final Node root; + + public Default(String text) { + this.root = new Parser(text).parse(); + } + + @Override + public Node root() { + return root; + } + + @Override + public void visit(Visitor visitor) { + visitor.start(); + root.visit(visitor); + visitor.end(); + } + } + + final class Parser { + private final char[] chars; + private char current; + private int index = 0; + + private Parser(String text) { + this.chars = text.trim().toCharArray(); + this.current = this.chars[0]; + } + + private Node parse() { + Node simple = parseSimple(); + if (simple.isEmpty()) { + return simple; + } + if (peekChar() == '|') { + next(); + Node next = parse(); + if (next instanceof Or or) { + return new Or(simple).add(or); + } + return new Or(simple).add(next); + } + return simple; + } + + private Node parseSimple() { + Node basic = parseBasic(); + if (basic.isEmpty()) { + return basic; + } + next(); + if (eof()) { + return basic; + } + Node next = parseSimple(); + if (next.isEmpty()) { + return basic; + } + return new And(basic, next); + } + + private Node parseBasic() { + Node elementary = parseElementary(); + if (elementary.isEmpty()) { + return elementary; + } + next(); + if (peekChar() == '*') { + return new Unary(Operator.STAR, elementary); + } else if (peekChar() == '+') { + return new Unary(Operator.PLUS, elementary); + } else if (peekChar() == '?') { + return new Unary(Operator.QUESTION, elementary); + } + prev(); + return elementary; + } + + private Node parseElementary() { + if (peekChar() == '.') { + next(); + return new Any(); + } else if (peekChar() == '[') { + return parseSet(); + } else if (peekChar() == '(') { + return parseGroup(); + } else if (peekChar() == ')') { + return new Empty(); + } else if (peekChar() == ']') { + return new Empty(); + } else if (peekChar() == '|') { + return new Empty(); + } + return parseChar(); + } + + private Node parseGroup() { + if (peekChar() == '(') { + next(); + Node element = parse(); + expect(')'); + return new Group(element); + } + return new Empty(); + } + + private Node parseSet() { + if (peekChar() == '[') { + next(); + boolean positive = true; + if (peekChar() == '^') { + positive = false; + next(); + } + return parseSet(positive); + } + return new Empty(); + } + + private Node parseSet(boolean positive) { + Set set = new Set(positive, new ArrayList<>()); + while (!eof() && peekChar() != ']') { + set.nodes.add(parseSetElement()); + } + return set; + } + + private Node parseSetElement() { + Node start = parseChar(); + next(); + if (peekChar() == '-') { + next(); + return new Range(start, parseChar()); + } + return start; + } + + private Node parseChar() { + char c = peekChar(); + if (c == 0) { + return new Empty(); + } else if (c == '\\') { + next(); + return new Escape(peekChar()); + } + return new Char(c); + } + + private void next() { + index++; + if (eof()) { + current = 0; + } else { + current = chars[index]; + } + } + + private void prev() { + index--; + current = chars[index]; + } + + private void expect(char c) { + if (eof() || peekChar() != c) { + throw new IllegalStateException("Unexpected character '" + c + "' but '" + peekChar() + "'"); + } + } + + private char peekChar() { + if (eof()) { + return 0; + } + return current; + } + + private boolean eof() { + return index >= chars.length; + } + + @Override + public String toString() { + return "Char: " + current + ", Index:" + index; + } + } +} diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java new file mode 100644 index 0000000..ec4e627 --- /dev/null +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java @@ -0,0 +1,108 @@ +package ru.mifi.practice.vol8.regexp.visitor; + +import ru.mifi.practice.vol8.regexp.Tree; + +public final class UTextVisitor implements Tree.Visitor { + private final StringBuilder buffer = new StringBuilder(); + private Tree.Node lastNode; + @Override + public void visit(Tree.Char ch) { + if (!buffer.isEmpty() && buffer.charAt(buffer.length() - 1) != '|') { + buffer.append(','); + } + buffer.append(ch); + } + + @Override + public void enter(Tree.And and) { + //Nothing + } + + @Override + public void exit(Tree.And and) { + lastNode = and; + } + + @Override + public void enter(Tree.Or or) { + + } + + @Override + public void exit(Tree.Or or) { + lastNode = or; + } + + @Override + public void enter(Tree.Unary unary) { + + } + + @Override + public void exit(Tree.Unary unary) { + switch (unary.operator()) { + case STAR -> { + buffer.append('[').append(lastNode).append(']'); + } + case PLUS -> { + buffer.append(lastNode).append('[').append(lastNode).append(']'); + } + case QUESTION -> { + buffer.append('{').append(lastNode).append('}'); + } + } + lastNode = null; + } + + @Override + public void enter(Tree.Group group) { + buffer.append('('); + } + + @Override + public void exit(Tree.Group group) { + buffer.append(')'); + } + + @Override + public void enter(Tree.Range range) { + + } + + @Override + public void exit(Tree.Range range) { + + } + + @Override + public void enter(Tree.Set set) { + buffer.append('['); + } + + @Override + public void exit(Tree.Set set) { + buffer.append(']'); + } + + @Override + public void start() { + buffer.setLength(0); + buffer.append("@startebnf").append("\n").append("pattern = "); + } + + @Override + public void end() { + buffer.append(";").append("\n"); + buffer.append("@endebnf").append("\n"); + } + + @Override + public void any() { + + } + + @Override + public String toString() { + return buffer.toString(); + } +} diff --git a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/PatternTest.java b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/PatternTest.java deleted file mode 100644 index e2d0a8f..0000000 --- a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/PatternTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package ru.mifi.practice.vol8.regexp; - -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -@DisplayName("Pattern") -class PatternTest { - - private static Stream patternCompile() { - return Stream.of( - Arguments.of("abc*d?|abce|ab?ei|a(bcde[cei])+|d(c|e|i)?i"), - Arguments.of("abc*d?|abce|ab?e?i?|a(bcde[cei])+|d[cei]?i"), - Arguments.of("p(abc*d?|ab?e?i?|a(bcde[cei])+|d[cei]?i)ab") - ); - } - - @DisplayName("compile") - @ParameterizedTest - @MethodSource("patternCompile") - void compile(String text) { - Pattern pattern = Pattern.compile(text); - assertEquals(text, pattern.toString()); - } -} diff --git a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java new file mode 100644 index 0000000..7c32cfa --- /dev/null +++ b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java @@ -0,0 +1,38 @@ +package ru.mifi.practice.vol8.regexp; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@DisplayName("Tree") +class TreeTest { + private static Stream patternText() { + return Stream.of( + Arguments.of("1.utext", "abc*d?|abce|ab?ei|a(bcde[cei])+|d(c|e|i)?i"), + Arguments.of("2.utext", "abc*d?|abce|ab?e?i?|a(bcde[cei])+|d[cei]?i"), + Arguments.of("3.utext", "p(abc*d?|ab?e?i?|a(bcde[cei])+|d[cei]?i)ab") + ); + } + + @DisplayName("parse") + @ParameterizedTest + @MethodSource("patternText") + void parse(String name, String text) throws IOException { + Tree.Default tree = new Tree.Default(text); + Tree.Node node = tree.root(); + assertEquals(text, node.toString()); + StringBuilder buffer = new StringBuilder(); + buffer.append("@startebnf").append('\n'); + buffer.append("pattern = ").append(node.toText()).append(";").append('\n'); + buffer.append("@endebnf").append('\n'); + Files.writeString(Path.of(name), buffer); + } +}