Write ClusteringResult to Report

jplag · Jun 10, 2022 · ae68a2f · ae68a2f
1 parent 8503fbf
commit ae68a2f
Show file tree

Hide file tree

Showing 8 changed files with 92 additions and 32 deletions.
diff --git a/jplag/src/main/java/de/jplag/clustering/Cluster.java b/jplag/src/main/java/de/jplag/clustering/Cluster.java
@@ -4,11 +4,9 @@
 import java.util.Collection;
 import java.util.List;
 import java.util.function.BiFunction;
-import java.util.stream.Collectors;
 
 /**
  * Cluster part of a {@link ClusteringResult}.
- *
  * @param <T> type of the clusters members
  */
 public class Cluster<T> {
@@ -18,7 +16,6 @@ public class Cluster<T> {
     private ClusteringResult<T> clusteringResult = null;
     private final float averageSimilarity;
 
-
     public Cluster(Collection<T> members, float communityStrength, float averageSimilarity) {
         this.members = new ArrayList<>(members);
         this.communityStrength = communityStrength;
@@ -36,7 +33,6 @@ public float getAverageSimilarity() {
 
     /**
      * See {@link ClusteringResult#getCommunityStrength}
-     *
      * @return community strength of the cluster
      */
     public float getCommunityStrength() {
@@ -46,7 +42,6 @@ public float getCommunityStrength() {
     /**
      * Sets this clusters clustering result. Should only be called by classes extending {@link ClusteringResult} on their
      * own clusters.
-     *
      * @param clusteringResult the clustering result
      */
     public void setClusteringResult(ClusteringResult<T> clusteringResult) {
@@ -67,7 +62,6 @@ public float getCommunityStrengthPerConnection() {
      * Computes a normalized community strength per connection. Can be used as measure for strength of evidence in
      * comparison to other clusters in the same clustering. Guaranteed to be smaller than 1. Negative values indicate
      * non-clusters. This method may only be called on clusters that are part of a ClusteringResult.
-     *
      * @return normalized community strength per connection
      */
     public float getNormalizedCommunityStrengthPerConnection() {
@@ -94,7 +88,6 @@ public double getWorth(BiFunction<T, T, Float> similarity) {
 
     /**
      * Computes the average similarity inside the cluster.
-     *
      * @param similarity function that supplies the similarity of two cluster members.
      * @return average similarity
      */
@@ -119,7 +112,6 @@ private int connections() {
 
     /**
      * Whether this cluster is very uninformative or wrong and should be pruned as last step of the clustering process.
-     *
      * @return is bad
      */
     public boolean isBadCluster() {

diff --git a/jplag/src/main/java/de/jplag/clustering/ClusteringAdapter.java b/jplag/src/main/java/de/jplag/clustering/ClusteringAdapter.java
@@ -28,9 +28,8 @@ public class ClusteringAdapter {
     /**
      * Creates the clustering adapter. Only submissions that appear in those similarities might also appear in
      * {@link ClusteringResult}s obtained from this adapter.
-     *
      * @param comparisons that should be included in the process of clustering
-     * @param metric      function that assigns a similarity to each comparison
+     * @param metric function that assigns a similarity to each comparison
      */
     public ClusteringAdapter(Collection<JPlagComparison> comparisons, Function<JPlagComparison, Float> metric) {
         mapping = new IntegerMapping<>(comparisons.size());
@@ -53,7 +52,6 @@ public ClusteringAdapter(Collection<JPlagComparison> comparisons, Function<JPlag
     /**
      * Use a generic clustering algorithm to cluster the submissions, that were included in this {@link ClusteringAdapter}'s
      * comparison.
-     *
      * @param algorithm that is used for clustering
      * @return the clustered submissions
      */

diff --git a/jplag/src/main/java/de/jplag/clustering/ClusteringResult.java b/jplag/src/main/java/de/jplag/clustering/ClusteringResult.java
@@ -9,7 +9,6 @@
 
 /**
  * Set of clusters dividing a set of entities.
- *
  * @param <T> type of the clustered entities (e.g. Submission)
  */
 public class ClusteringResult<T> {
@@ -35,7 +34,6 @@ public Collection<Cluster<T>> getClusters() {
      * changed, a higher community strength denotes a better clustering. See: Finding and evaluating community structure in
      * networks, M. E. J. Newman and M. Girvan, Phys. Rev. E 69, 026113 – Published 26 February 2004, Doi:
      * 10.1103/PhysRevE.69.026113 It's called modularity in that paper.
-     *
      * @return community strength
      */
     public float getCommunityStrength() {
@@ -44,7 +42,6 @@ public float getCommunityStrength() {
 
     /**
      * How much this clustering result is worth during optimization.
-     *
      * @param similarity TODO DF: JAVADOC
      * @return worth
      */
@@ -106,15 +103,20 @@ private static float calculateAverageSimilarityFor(Collection<Integer> cluster,
                     sumOfSimilarities += similarityMatrix.getEntry(indexOfSubmission1, indexOfSubmission2);
                 }
             }
-            submissionIndicesWithoutIndicesAlreadyProcessed.remove(indexOfSubmission1); // remove indices we already processed from second list to avoid adding them anew unnecessary
+            submissionIndicesWithoutIndicesAlreadyProcessed.remove(indexOfSubmission1); // remove indices we already processed from second list to
+                                                                                        // avoid adding them anew unnecessary
         }
         int nMinusOne = cluster.size() - 1;
-        float numberOfComparisons = (nMinusOne * (nMinusOne + 1)) / 2f; /* Use Gauss sum to calculate number of comparisons in cluster: Given cluster of size n we need Gauss sum of n-1 comparisons:
-                                                                           compare first element of cluster to all other except itself: n-1 comparisons. compare second element two all other except itself and first element
-                                                                         (as these two were already compared when we processed the first element), n-2 comparisons. compare third element to all other but itself and all previously compared: n-3 comparisons and so on.
-                                                                         when we reach the second to last element we have n-(n-1)=1 comparisons left. when we reach the last element it has already been compared to all other.
-                                                                         adding up all comparisons we get: (n-1) + (n-2) + (n-3) + ... + (n-(n-1)) = Gauss sum of (n-1)
-         */
+        float numberOfComparisons = (nMinusOne * (nMinusOne + 1))
+                / 2f; /*
+                       * Use Gauss sum to calculate number of comparisons in cluster: Given cluster of size n we need Gauss sum of n-1
+                       * comparisons: compare first element of cluster to all other except itself: n-1 comparisons. compare second element two
+                       * all other except itself and first element (as these two were already compared when we processed the first element),
+                       * n-2 comparisons. compare third element to all other but itself and all previously compared: n-3 comparisons and so
+                       * on. when we reach the second to last element we have n-(n-1)=1 comparisons left. when we reach the last element it
+                       * has already been compared to all other. adding up all comparisons we get: (n-1) + (n-2) + (n-3) + ... + (n-(n-1)) =
+                       * Gauss sum of (n-1)
+                       */
         return sumOfSimilarities / numberOfComparisons;
     }
 

diff --git a/jplag/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java b/jplag/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java
@@ -12,6 +12,7 @@
 import org.slf4j.LoggerFactory;
 
 import de.jplag.*;
+import de.jplag.reporting.reportobject.mapper.ClusteringResultMapper;
 import de.jplag.reporting.reportobject.model.*;
 import de.jplag.reporting.reportobject.model.Match;
 
@@ -21,6 +22,7 @@
 public class ReportObjectFactory {
 
     private static final Logger logger = LoggerFactory.getLogger(ReportObjectFactory.class);
+    private static final ClusteringResultMapper clusteringResultMapper = new ClusteringResultMapper();
 
     /**
      * Converts a JPlagResult to a JPlagReport.
@@ -58,7 +60,7 @@ private static OverviewReport generateOverviewReport(JPlagResult result) {
         overviewReport.setExecutionTime(result.getDuration());
         overviewReport.setComparisonNames(getComparisonNames(comparisons));
         overviewReport.setMetrics(getMetrics(result));
-        overviewReport.setClusters(getClusters(result));
+        overviewReport.setClusters(clusteringResultMapper.map(result));
 
         return overviewReport;
     }
@@ -166,13 +168,6 @@ private static Match convertMatchToReportMatch(JPlagComparison comparison, de.jp
         return new Match(startTokenFirst.getFile(), startTokenSecond.getFile(), startFirst, endFirst, startSecond, endSecond, tokens);
     }
 
-    // TODO implement after PR Read clustering #281
-    private static List<Cluster> getClusters(JPlagResult result) {
-        // List<ClusteringResult<Submission>> clusters = result.getClusteringResult();
-        // return clusters.map( c -> new Cluster(getAvgSimilarity, getStrength, c.getMembers().map(Submission::getName)))
-        return List.of();
-    }
-
     private static List<String> readFileLines(File file) {
         List<String> lines = new ArrayList<>();
         try (BufferedReader bufferedReader = new BufferedReader(new FileReader(file))) {

diff --git a/jplag/src/main/java/de/jplag/reporting/reportobject/mapper/ClusteringResultMapper.java b/jplag/src/main/java/de/jplag/reporting/reportobject/mapper/ClusteringResultMapper.java
@@ -0,0 +1,26 @@
+package de.jplag.reporting.reportobject.mapper;
+
+import java.util.Collection;
+import java.util.List;
+
+import de.jplag.JPlagResult;
+import de.jplag.Submission;
+import de.jplag.clustering.ClusteringResult;
+import de.jplag.reporting.reportobject.model.Cluster;
+
+/**
+ * Extracts and maps the clusters from the JPlagResult to the corresponding JSON DTO
+ */
+public class ClusteringResultMapper {
+    public List<Cluster> map(JPlagResult result) {
+        var clusteringResult = result.getClusteringResult();
+        return clusteringResult.stream().map(ClusteringResult::getClusters).flatMap(Collection::stream).map(this::convertCluster).toList();
+    }
+
+    private Cluster convertCluster(de.jplag.clustering.Cluster<Submission> from) {
+        var strength = from.getCommunityStrength();
+        var avgSimilarity = from.getAverageSimilarity();
+        var member = from.getMembers().stream().map(Submission::getName).toList();
+        return new Cluster(avgSimilarity, strength, member);
+    }
+}
diff --git a/jplag/src/main/java/de/jplag/reporting/reportobject/model/Cluster.java b/jplag/src/main/java/de/jplag/reporting/reportobject/model/Cluster.java
@@ -4,6 +4,6 @@
 
 import com.fasterxml.jackson.annotation.JsonProperty;
 
-public record Cluster(@JsonProperty("average_similarity") float averageSimilarity,
-                      @JsonProperty("strength") float strength, @JsonProperty("members") List<String> members) {
+public record Cluster(@JsonProperty("average_similarity") float averageSimilarity, @JsonProperty("strength") float strength,
+        @JsonProperty("members") List<String> members) {
 }
diff --git a/jplag/src/test/java/de/jplag/clustering/ClusteringResultTest.java b/jplag/src/test/java/de/jplag/clustering/ClusteringResultTest.java
@@ -83,7 +83,6 @@ void averageSimilarity2() {
         setEntries(similarity, 3, 4, 0.5);
         setEntries(similarity, 3, 5, 0.05);
 
-
         ClusteringResult<Integer> result = ClusteringResult.fromIntegerCollections(List.of(List.of(0, 1, 4), List.of(2, 3, 5)), similarity);
         var clusters = new ArrayList<>(result.getClusters());
 

diff --git a/jplag/src/test/java/de/jplag/reporting/reportobject/mapper/ClusteringResultMapperTest.java b/jplag/src/test/java/de/jplag/reporting/reportobject/mapper/ClusteringResultMapperTest.java
@@ -0,0 +1,48 @@
+package de.jplag.reporting.reportobject.mapper;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+
+import de.jplag.JPlagResult;
+import de.jplag.Submission;
+import de.jplag.clustering.Cluster;
+import de.jplag.clustering.ClusteringResult;
+
+public class ClusteringResultMapperTest {
+    private final ClusteringResultMapper clusteringResultMapper = new ClusteringResultMapper();
+
+    @Test
+    public void test() {
+        // given
+        JPlagResult resultMock = mock(JPlagResult.class);
+        Cluster<Submission> cluster1 = createClusterWith(0.2f, 0.4f, "1", "2");
+        Cluster<Submission> cluster2 = createClusterWith(0.3f, 0.6f, "3", "4", "5");
+        when(resultMock.getClusteringResult()).thenReturn(List.of(new ClusteringResult<>(List.of(cluster1, cluster2), 0.3f)));
+
+        // when
+        var result = clusteringResultMapper.map(resultMock);
+
+        // then
+        assertEquals(List.of(new de.jplag.reporting.reportobject.model.Cluster(0.4f, 0.2f, List.of("1", "2")),
+                new de.jplag.reporting.reportobject.model.Cluster(0.6f, 0.3f, List.of("3", "4", "5"))
+
+        ), result);
+    }
+
+    private Cluster<Submission> createClusterWith(Float communityStrength, Float averageSimilarity, String... ids) {
+        var submissions = Arrays.stream(ids).map(this::submissionWithId).toList();
+        return new Cluster<>(submissions, communityStrength, averageSimilarity);
+    }
+
+    private Submission submissionWithId(String id) {
+        Submission submission = mock(Submission.class);
+        when(submission.getName()).thenReturn(id);
+        return submission;
+    }
+}