From 9e57ca856336bb2341b9c43a5d29a15f99b53426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A5=D0=BB=D0=B5=D0=B1=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2=20?= =?UTF-8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=90=D0=BB=D0=B5=D0=BA?= =?UTF-8?q?=D1=81=D0=B0=D0=BD=D0=B4=D1=80=D0=BE=D0=B2=D0=B8=D1=87?= Date: Wed, 18 Dec 2024 20:02:10 +0300 Subject: [PATCH] FSM.Matched --- vol8/src/docs/RegExpr.utext | 25 +-- .../ru/mifi/practice/vol8/regexp/Mach.java | 195 ++++++++++++++++++ .../regexp/visitor/AbstractStringVisitor.java | 92 +-------- .../vol8/regexp/visitor/AbstractVisitor.java | 105 ++++++++++ .../vol8/regexp/visitor/MatchGenerator.java | 16 ++ ...isitor.java => OriginalTextGenerator.java} | 2 +- ...isitor.java => PlantUmlTextGenerator.java} | 2 +- .../vol8/regexp/AbstractPatternTest.java | 16 ++ .../mifi/practice/vol8/regexp/MachTest.java | 21 ++ .../mifi/practice/vol8/regexp/TreeTest.java | 20 +- 10 files changed, 374 insertions(+), 120 deletions(-) create mode 100644 vol8/src/main/java/ru/mifi/practice/vol8/regexp/Mach.java create mode 100644 vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractVisitor.java create mode 100644 vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/MatchGenerator.java rename vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/{TextVisitor.java => OriginalTextGenerator.java} (95%) rename vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/{UTextVisitor.java => PlantUmlTextGenerator.java} (97%) create mode 100644 vol8/src/test/java/ru/mifi/practice/vol8/regexp/AbstractPatternTest.java create mode 100644 vol8/src/test/java/ru/mifi/practice/vol8/regexp/MachTest.java diff --git a/vol8/src/docs/RegExpr.utext b/vol8/src/docs/RegExpr.utext index 48b4843..5017973 100644 --- a/vol8/src/docs/RegExpr.utext +++ b/vol8/src/docs/RegExpr.utext @@ -1,19 +1,20 @@ //https://plantuml.com/ru/ebnf @startebnf -RE = union | simple_re; -union = RE, "|", simple_re; -simple_re = concatenation | basic_re; -concatenation = simple_re, basic_re; +regexp = or | simple_re; +or = regexp, "|", simple_re; +simple_re = add | basic_re; +add = simple_re, basic_re; basic_re = star | plus | elementary_re; star = elementary_re, "*"; plus = elementary_re, "+"; elementary_re = groups | "." | "$" | char | set; -groups = "(", RE, ")"; -char = "characters" | "\", "characters"; -set = positive_set | negative_set; -positive_set = "[", set_items, "]"; -negative_set = "[^", set_items, "]"; -set_items = set_item | set_item, set_items; -set_item = range | char; -range = char, "-", char; +groups = "(", regexp, ")"; +char = characters | "\", characters; +set = positive_set | negative_set; +positive_set = "[", set_items, "]"; +negative_set = "[^", set_items, "]"; +set_items = set_item | set_item, set_items; +set_item = range | char; +range = char, "-", char; +characters = (a-z|A-Z); @endebnf diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Mach.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Mach.java new file mode 100644 index 0000000..3678cd0 --- /dev/null +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/Mach.java @@ -0,0 +1,195 @@ +package ru.mifi.practice.vol8.regexp; + +import lombok.experimental.UtilityClass; +import ru.mifi.practice.vol8.regexp.visitor.MatchGenerator; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static ru.mifi.practice.vol8.regexp.Mach.Input.StringInput; + +public interface Mach { + + boolean match(String input); + + static Mach of(State start) { + return new DefaultMach(start); + } + + @UtilityClass + final class Compiler { + public Mach compile(String pattern) { + MatchGenerator generator = new MatchGenerator(); + new Tree.Default(pattern).visit(generator); + return generator.getMach(); + } + } + + final class DefaultMach implements Mach { + private final State start; + + private DefaultMach(State start) { + this.start = start; + } + + @Override + public boolean match(String text) { + StringInput input = new StringInput(text); + return new Result(Match.MATCHED, start).match(input); + } + } + + interface Input { + + void mark(); + + void reset(); + + Optional peek(); + + void next(); + + final class StringInput implements Input { + private final char[] chars; + private int it; + private int mark; + + public StringInput(String text) { + this.chars = text.toCharArray(); + } + + @Override + public void mark() { + mark = it; + } + + @Override + public void reset() { + it = mark; + mark = 0; + } + + @Override + public Optional peek() { + if (it >= chars.length) { + return Optional.empty(); + } + return Optional.of(chars[it]); + } + + @Override + public void next() { + if (it < chars.length) { + it++; + } + } + } + } + + @SuppressWarnings("PMD.UnusedPrivateMethod") + abstract class State { + protected final Set transitions = new HashSet<>(); + + private void add(State state) { + transitions.add(state); + } + + protected abstract Match match(Input input); + + protected abstract Result next(Input input); + } + + final class Sequence extends State { + @Override + protected Match match(Input input) { + return Match.SKIPPED; + } + + @Override + protected Result next(Input input) { + Iterator it = transitions.iterator(); + while (it.hasNext()) { + State state = it.next(); + Result result = state.next(input); + if (result.match == Match.SKIPPED || result.match == Match.MATCHED) { + it.remove(); + } else { + return new Result(Match.UNMATCHED); + } + } + return new Result(Match.MATCHED); + } + } + + final class Epsilon extends State { + @Override + protected Match match(Input input) { + return Match.SKIPPED; + } + + @Override + protected Result next(Input input) { + return new Result(Match.SKIPPED, transitions.toArray(new State[0])); + } + } + + final class Matched extends State { + private final Character character; + + private Matched(Character character) { + this.character = character; + } + + @Override + protected Match match(Input input) { + return input.peek().filter(c -> c.equals(character)) + .map(c -> Match.MATCHED).orElse(Match.UNMATCHED); + } + + @Override + protected Result next(Input input) { + List states = new ArrayList<>(); + transitions.forEach(t -> { + Match match = t.match(input); + if (match == Match.MATCHED || match == Match.SKIPPED) { + states.add(t); + } + }); + return new Result(states.isEmpty() ? Match.UNMATCHED : Match.MATCHED, states.toArray(new State[0])); + } + } + + record Result(Match match, State... next) { + boolean match(Input input) { + Set states = new HashSet<>(); + for (State state : next) { + Match matched = state.match(input); + if (matched == Match.MATCHED || matched == Match.SKIPPED) { + states.add(state); + } + } + Set results = new HashSet<>(); + for (State state : states) { + input.mark(); + Result result = state.next(input); + input.reset(); + if (result.match == Match.SKIPPED || result.match == Match.MATCHED) { + results.add(result); + } + } + return results.stream().anyMatch(r -> r.match(input)); + } + } + + enum Match { + MATCHED, SKIPPED, UNMATCHED + } + + enum Type { + SEQUENCE, PARALLELISM + } +} diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractStringVisitor.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractStringVisitor.java index 16fb041..734964d 100644 --- a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractStringVisitor.java +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractStringVisitor.java @@ -7,7 +7,7 @@ import java.nio.file.Path; @SuppressWarnings("PMD.AvoidStringBufferField") -abstract class AbstractStringVisitor implements Tree.Visitor { +abstract class AbstractStringVisitor extends AbstractVisitor { protected final StringBuilder buffer = new StringBuilder(); public final void writeFile(String fileName) throws IOException { @@ -24,96 +24,6 @@ public void start() { buffer.setLength(0); } - @Override - public void nextRange() { - //Nothing - } - - @Override - public void nextSet() { - //Nothing - } - - @Override - public void nextAnd() { - //Nothing - } - - @Override - public void nextOr() { - //Nothing - } - - @Override - public void any() { - //Nothing - } - - @Override - public void end() { - //Nothing - } - - @Override - public void exit(Tree.Set set) { - //Nothing - } - - @Override - public void exit(Tree.Range range) { - //Nothing - } - - @Override - public void exit(Tree.Group group) { - //Nothing - } - - @Override - public void exit(Tree.Unary unary) { - //Nothing - } - - @Override - public void exit(Tree.Or or) { - //Nothing - } - - @Override - public void exit(Tree.And and) { - //Nothing - } - - @Override - public void enter(Tree.Set set) { - //Nothing - } - - @Override - public void enter(Tree.Range range) { - //Nothing - } - - @Override - public void enter(Tree.Group group) { - //Nothing - } - - @Override - public void enter(Tree.Unary unary) { - //Nothing - } - - @Override - public void enter(Tree.Or or) { - //Nothing - } - - @Override - public void enter(Tree.And and) { - //Nothing - } - @Override public String toString() { return buffer.toString(); diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractVisitor.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractVisitor.java new file mode 100644 index 0000000..b252536 --- /dev/null +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/AbstractVisitor.java @@ -0,0 +1,105 @@ +package ru.mifi.practice.vol8.regexp.visitor; + +import ru.mifi.practice.vol8.regexp.Tree; + +abstract class AbstractVisitor implements Tree.Visitor { + @Override + public void visit(Tree.Char ch) { + //Nothing + } + + @Override + public void start() { + //Nothing + } + + @Override + public void nextRange() { + //Nothing + } + + @Override + public void nextSet() { + //Nothing + } + + @Override + public void nextAnd() { + //Nothing + } + + @Override + public void nextOr() { + //Nothing + } + + @Override + public void any() { + //Nothing + } + + @Override + public void end() { + //Nothing + } + + @Override + public void exit(Tree.Set set) { + //Nothing + } + + @Override + public void exit(Tree.Range range) { + //Nothing + } + + @Override + public void exit(Tree.Group group) { + //Nothing + } + + @Override + public void exit(Tree.Unary unary) { + //Nothing + } + + @Override + public void exit(Tree.Or or) { + //Nothing + } + + @Override + public void exit(Tree.And and) { + //Nothing + } + + @Override + public void enter(Tree.Set set) { + //Nothing + } + + @Override + public void enter(Tree.Range range) { + //Nothing + } + + @Override + public void enter(Tree.Group group) { + //Nothing + } + + @Override + public void enter(Tree.Unary unary) { + //Nothing + } + + @Override + public void enter(Tree.Or or) { + //Nothing + } + + @Override + public void enter(Tree.And and) { + //Nothing + } +} diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/MatchGenerator.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/MatchGenerator.java new file mode 100644 index 0000000..4934e46 --- /dev/null +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/MatchGenerator.java @@ -0,0 +1,16 @@ +package ru.mifi.practice.vol8.regexp.visitor; + +import ru.mifi.practice.vol8.regexp.Mach; + +public final class MatchGenerator extends AbstractVisitor { + private Mach.State current; + + @Override + public void start() { + current = new Mach.Sequence(); + } + + public Mach getMach() { + return Mach.of(current); + } +} diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/TextVisitor.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/OriginalTextGenerator.java similarity index 95% rename from vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/TextVisitor.java rename to vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/OriginalTextGenerator.java index 7e95dd1..8dc9794 100644 --- a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/TextVisitor.java +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/OriginalTextGenerator.java @@ -5,7 +5,7 @@ import java.util.ArrayDeque; import java.util.Deque; -public final class TextVisitor extends AbstractStringVisitor { +public final class OriginalTextGenerator extends AbstractStringVisitor { private final Deque nextOr = new ArrayDeque<>(); @Override diff --git a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/PlantUmlTextGenerator.java similarity index 97% rename from vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java rename to vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/PlantUmlTextGenerator.java index b837501..5f7d2ae 100644 --- a/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/UTextVisitor.java +++ b/vol8/src/main/java/ru/mifi/practice/vol8/regexp/visitor/PlantUmlTextGenerator.java @@ -5,7 +5,7 @@ import java.util.ArrayDeque; import java.util.Deque; -public final class UTextVisitor extends AbstractStringVisitor { +public final class PlantUmlTextGenerator extends AbstractStringVisitor { private final Deque nextOr = new ArrayDeque<>(); private final Deque nextSet = new ArrayDeque<>(); diff --git a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/AbstractPatternTest.java b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/AbstractPatternTest.java new file mode 100644 index 0000000..a3730bb --- /dev/null +++ b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/AbstractPatternTest.java @@ -0,0 +1,16 @@ +package ru.mifi.practice.vol8.regexp; + +import org.junit.jupiter.params.provider.Arguments; + +import java.util.stream.Stream; + +abstract class AbstractPatternTest { + protected static Stream patternText() { + return Stream.of( + Arguments.of("1.utext", "abc*d?|abce|ab?ei|a(bcde[cei])+|d(c|e|i)?i"), + Arguments.of("2.utext", "abc*d?|abce|ab?e?i?|a(bcde[cei])+|d[cei]?i"), + Arguments.of("3.utext", "p(abc*d?|ab?e?i?|a(bcde[cei])+|d[cei]?i)ab"), + Arguments.of("4.utext", "(a|b*(c?d)+|e)|(of|pt)") + ); + } +} diff --git a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/MachTest.java b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/MachTest.java new file mode 100644 index 0000000..330e8fc --- /dev/null +++ b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/MachTest.java @@ -0,0 +1,21 @@ +package ru.mifi.practice.vol8.regexp; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static ru.mifi.practice.vol8.regexp.Mach.Compiler.compile; + +@DisplayName("Mach") +class MachTest extends AbstractPatternTest { + @DisplayName("compile") + @ParameterizedTest + @MethodSource("patternText") + void parse(String name, String text) throws IOException { + Mach mach = compile(text); + assertTrue(mach.match("blabla")); + } +} diff --git a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java index 14918fb..f359536 100644 --- a/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java +++ b/vol8/src/test/java/ru/mifi/practice/vol8/regexp/TreeTest.java @@ -2,26 +2,16 @@ import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; -import ru.mifi.practice.vol8.regexp.visitor.TextVisitor; -import ru.mifi.practice.vol8.regexp.visitor.UTextVisitor; +import ru.mifi.practice.vol8.regexp.visitor.OriginalTextGenerator; +import ru.mifi.practice.vol8.regexp.visitor.PlantUmlTextGenerator; import java.io.IOException; -import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.assertEquals; @DisplayName("Tree") -class TreeTest { - private static Stream patternText() { - return Stream.of( - Arguments.of("1.utext", "abc*d?|abce|ab?ei|a(bcde[cei])+|d(c|e|i)?i"), - Arguments.of("2.utext", "abc*d?|abce|ab?e?i?|a(bcde[cei])+|d[cei]?i"), - Arguments.of("3.utext", "p(abc*d?|ab?e?i?|a(bcde[cei])+|d[cei]?i)ab"), - Arguments.of("4.utext", "(a|b*(c?d)+|e)|(of|pt)") - ); - } +class TreeTest extends AbstractPatternTest { @DisplayName("parse") @ParameterizedTest @@ -30,8 +20,8 @@ void parse(String name, String text) throws IOException { Tree.Default tree = new Tree.Default(text); Tree.Node node = tree.root(); assertEquals(text, node.toString()); - UTextVisitor uTextVisitor = new UTextVisitor(); - TextVisitor textVisitor = new TextVisitor(); + PlantUmlTextGenerator uTextVisitor = new PlantUmlTextGenerator(); + OriginalTextGenerator textVisitor = new OriginalTextGenerator(); tree.visit(uTextVisitor); uTextVisitor.writeFile(name); tree.visit(textVisitor);