Skip to content

Commit

Permalink
Revert "Remove delay between mining runs"
Browse files Browse the repository at this point in the history
  • Loading branch information
dabico authored Jun 9, 2024
1 parent 6203eb8 commit 20d2973
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Here's a list of project-specific arguments supported by the application that yo
| `ghs.crawler.minimum-stars` | int | 10 | Inclusive lower bound for the number of stars a project needs to have in order to be picked up by the crawler. Must not be negative. |
| `ghs.crawler.languages` | List<String> | See [application.properties](src/main/resources/application.properties) | List of language names that will be targeted during crawling. Must not contain blank strings. To ensure proper operations, the names must match those specified in [linguist](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml). |
| `ghs.crawler.start-date` | Date | 2008-01-01T00:00:00Z | Default crawler start date: the earliest date for repository crawling in the absence of prior crawl jobs. Value format: `yyyy-MM-ddTHH:MM:SSZ`. |
| `ghs.crawler.delay-between-runs` | Duration | PT6H | Delay between successive crawler runs, expressed as a duration string. |
| `ghs.analysis.enabled` | Boolean | true | Specifies if the analysis job is enabled. |
| `ghs.analysis.delay-between-runs` | Duration | PT6H | Delay between successive analysis runs, expressed as a duration string. |
| `ghs.analysis.max-pool-threads` | int | 3 | Maximum amount of live threads dedicated to concurrently analyzing repositories. Must be positive. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
import org.springframework.format.annotation.DateTimeFormat;

import javax.validation.constraints.NotBlank;
import javax.validation.constraints.NotNull;
import javax.validation.constraints.PastOrPresent;
import javax.validation.constraints.PositiveOrZero;
import java.time.Duration;
import java.util.Date;
import java.util.List;

Expand All @@ -28,17 +30,22 @@ public class CrawlerProperties {
@PastOrPresent
Date startDate;

@NotNull
Duration delayBetweenRuns;

@ConstructorBinding
public CrawlerProperties(
Boolean enabled,
int minimumStars,
List<String> languages,
@DateTimeFormat(pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'")
Date startDate
Date startDate,
Duration delayBetweenRuns
) {
this.enabled = enabled;
this.minimumStars = minimumStars;
this.languages = languages;
this.startDate = startDate;
this.delayBetweenRuns = delayBetweenRuns;
}
}
9 changes: 7 additions & 2 deletions src/main/java/ch/usi/si/seart/job/CrawlProjectsJob.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package ch.usi.si.seart.job;

import ch.usi.si.seart.config.properties.CrawlerProperties;
import ch.usi.si.seart.exception.MetadataCrawlingException;
import ch.usi.si.seart.exception.UnsplittableRangeException;
import ch.usi.si.seart.github.GitHubGraphQlConnector;
Expand Down Expand Up @@ -47,7 +48,6 @@
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
Expand All @@ -69,10 +69,12 @@ public class CrawlProjectsJob implements Runnable {
GitHubRestConnector gitHubRestConnector;
GitHubGraphQlConnector gitHubGraphQlConnector;

CrawlerProperties crawlerProperties;

Ranges.Printer<Date> rangePrinter;
Ranges.Splitter<Date> rangeSplitter;

@Scheduled(fixedDelay = 1, timeUnit = TimeUnit.SECONDS)
@Scheduled(fixedDelayString = "${ghs.crawler.delay-between-runs}")
public void run() {
log.info("Initializing language queue...");
Collection<Language> languages = languageService.getTargetedLanguages();
Expand All @@ -84,6 +86,9 @@ public void run() {
Language.Progress progress = languageService.getProgress(language);
new LanguageCrawler(language, progress).run();
}
Duration delay = crawlerProperties.getDelayBetweenRuns();
Instant instant = Instant.now().plus(delay);
log.info("Next crawl scheduled for: {}", Date.from(instant));
}

@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ ghs.crawler.languages[40]=F#
ghs.crawler.languages[41]=Elm
ghs.crawler.languages[42]=Zig
ghs.crawler.start-date=2008-01-01T00:00:00Z
ghs.crawler.delay-between-runs=PT6H

# Analysis Configuration
ghs.analysis.enabled=true
Expand Down

0 comments on commit 20d2973

Please sign in to comment.