Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version 2.6.1 #147

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import lombok.Getter;
import lombok.NoArgsConstructor;

import java.util.Collections;
import java.util.List;

@Getter
Expand All @@ -19,9 +18,4 @@ public class ComplexNoticeFormatDto {
public int getNormalNoticeSize() {
return normalNoticeList.size();
}

public void reverseEachNoticeList() {
Collections.reverse(importantNoticeList);
Collections.reverse(normalNoticeList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ public class ScrapingResultDto {

private Document document;

private String url;
private String viewUrl;
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ public List<ComplexNoticeFormatDto> scrap(DeptInfo deptInfo, Function<DeptInfo,
}
}


return noticeDtoList;
}

Expand All @@ -53,7 +52,7 @@ private List<ComplexNoticeFormatDto> htmlParsingFromScrapingResult(DeptInfo dept
List<ComplexNoticeFormatDto> noticeDtoList = new LinkedList<>();
for (ScrapingResultDto reqResult : requestResults) {
Document document = reqResult.getDocument();
String viewUrl = reqResult.getUrl();
String viewUrl = reqResult.getViewUrl();

RowsDto rowsDto = deptInfo.parse(document);
List<CommonNoticeFormatDto> importantNoticeFormatDtos = rowsDto.buildImportantRowList(viewUrl);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ private HttpHeaders createKuisNoticeRequestHeader(String sessionId) {
private List<CommonNoticeFormatDto> convertToCommonFormatDto(List<KuisNoticeDto> kuisNoticeDtoList) {
return kuisNoticeDtoList.stream()
.map(dto -> (CommonNoticeFormatDto) dtoConverter.convert(dto))
.toList();
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
package com.kustacks.kuring.worker.scrap.client.notice;

import com.kustacks.kuring.common.exception.code.ErrorCode;
import com.kustacks.kuring.common.exception.InternalLogicException;
import com.kustacks.kuring.common.exception.code.ErrorCode;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.client.JsoupClient;
import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import org.springframework.util.StopWatch;

import java.io.IOException;
import java.util.LinkedList;
import java.util.Collections;
import java.util.List;

@Slf4j
@Component
public class LatestPageNoticeApiClient implements NoticeApiClient<ScrapingResultDto, DeptInfo> {

private static final int PAGE_NUM = 1; // recentPage는 pageNum 인자가 1부터 시작
private static final int ARTICLE_NUMBERS_PER_PAGE = 12;
private static final int LATEST_SCRAP_TIMEOUT = 60000; // 1분
private static final int LATEST_SCRAP_ALL_TIMEOUT = 120000; // 2분
private static final int START_PAGE_NUM = 1; // page는 인자가 1부터 시작
private static final int ROW_NUMBERS_PER_PAGE = 20;
private static final int LATEST_SCRAP_TIMEOUT = 2000; // 2초
private static final int LATEST_SCRAP_ALL_TIMEOUT = 60000; // 1분

private final JsoupClient jsoupClient;

Expand All @@ -31,68 +32,61 @@ public LatestPageNoticeApiClient(JsoupClient normalJsoupClient) {

@Override
public List<ScrapingResultDto> request(DeptInfo deptInfo) throws InternalLogicException {
int size = getDeptInfoSize(deptInfo);

List<ScrapingResultDto> reqResults = new LinkedList<>();
for(int i = 0; i < size; i++) {
try {
ScrapingResultDto resultDto = getScrapingResultDto(i, deptInfo, ARTICLE_NUMBERS_PER_PAGE, LATEST_SCRAP_TIMEOUT);
reqResults.add(resultDto);
} catch (IOException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_SCRAP, e);
} catch (NullPointerException | IndexOutOfBoundsException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_PARSE, e);
}
try {
ScrapingResultDto resultDto = getScrapingResultDto(deptInfo, ROW_NUMBERS_PER_PAGE, LATEST_SCRAP_TIMEOUT);
return List.of(resultDto);
} catch (IOException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_SCRAP, e);
} catch (NullPointerException | IndexOutOfBoundsException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_PARSE, e);
}

return reqResults;
}

@Override
public List<ScrapingResultDto> requestAll(DeptInfo deptInfo) throws InternalLogicException {
int size = getDeptInfoSize(deptInfo);

List<ScrapingResultDto> reqResults = new LinkedList<>();
for (int i = 0; i < size; i++) {
try {
int totalNoticeSize = getTotalNoticeSize(i, deptInfo);

ScrapingResultDto resultDto = getScrapingResultDto(i, deptInfo, totalNoticeSize, LATEST_SCRAP_ALL_TIMEOUT);
reqResults.add(resultDto);
} catch (IOException e) {
log.info("Department Scrap all IOException: {}", e.getMessage());
} catch (NullPointerException | IndexOutOfBoundsException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_PARSE, e);
}
try {
String url = buildUrlForTotalNoticeCount(deptInfo);
int totalNoticeSize = getTotalNoticeSize(url);

ScrapingResultDto resultDto = getScrapingResultDto(deptInfo, totalNoticeSize, LATEST_SCRAP_ALL_TIMEOUT);
return List.of(resultDto);
} catch (IOException e) {
log.info("Department Scrap all IOException: {}", e.getMessage());
} catch (NullPointerException | IndexOutOfBoundsException e) {
throw new InternalLogicException(ErrorCode.NOTICE_SCRAPER_CANNOT_PARSE, e);
}

return reqResults;
return Collections.emptyList();
}

private int getDeptInfoSize(DeptInfo deptInfo) {
return deptInfo.getNoticeScrapInfo().getBoardSeqs().size();
}

private ScrapingResultDto getScrapingResultDto(int index, DeptInfo deptInfo, int totalNoticeSize, int timeout) throws IOException {
String requestUrl = deptInfo.createRequestUrl(index, totalNoticeSize, PAGE_NUM);
String viewUrl = deptInfo.createViewUrl(index);

Document document = jsoupClient.get(requestUrl, timeout);

return new ScrapingResultDto(document, viewUrl);
private String buildUrlForTotalNoticeCount(DeptInfo deptInfo) {
return deptInfo.createRequestUrl(1, 1);
}

private int getTotalNoticeSize(int index, DeptInfo deptInfo) throws IOException, IndexOutOfBoundsException, NullPointerException {
String url = deptInfo.createRequestUrl(index, 1, 1);

public int getTotalNoticeSize(String url) throws IOException, IndexOutOfBoundsException, NullPointerException {
Document document = jsoupClient.get(url, LATEST_SCRAP_TIMEOUT);

Element totalNoticeSizeElement = document.selectFirst(".pl15 > strong");
Element totalNoticeSizeElement = document.selectFirst(".util-search strong");
if (totalNoticeSizeElement == null) {
totalNoticeSizeElement = document.selectFirst(".total_count");
}

assert totalNoticeSizeElement != null;
return Integer.parseInt(totalNoticeSizeElement.ownText());
}

private ScrapingResultDto getScrapingResultDto(DeptInfo deptInfo, int rowSize, int timeout) throws IOException {
String requestUrl = deptInfo.createRequestUrl(START_PAGE_NUM, rowSize);
String viewUrl = deptInfo.createViewUrl();

StopWatch stopWatch = new StopWatch(deptInfo.getDeptName() + "Request");
stopWatch.start();

Document document = jsoupClient.get(requestUrl, timeout);

stopWatch.stop();
log.info("[{}] takes {}millis to respond", deptInfo.getDeptName(), stopWatch.getTotalTimeMillis());

return new ScrapingResultDto(document, viewUrl);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ public class DeptInfo {
protected StaffScrapInfo staffScrapInfo;
protected DepartmentName departmentName;
protected String collegeName;
protected String code;

public List<ScrapingResultDto> scrapLatestPageHtml() {
return noticeApiClient.request(this);
Expand All @@ -48,26 +47,22 @@ public List<String> getProfessorForumIds() {
return this.staffScrapInfo.getProfessorForumId();
}

public String createRequestUrl(int index, int curPage, int pageNum) {
return UriComponentsBuilder.fromUriString(latestPageNoticeProperties.getListUrl())
.queryParam("siteId", noticeScrapInfo.getSiteId())
.queryParam("boardSeq", noticeScrapInfo.getBoardSeqs().get(index))
.queryParam("menuSeq", noticeScrapInfo.getMenuSeqs().get(index))
.queryParam("curPage", curPage)
.queryParam("pageNum", pageNum)
.buildAndExpand(departmentName.getHostPrefix())
.toUriString();
public String createRequestUrl(int page, int row) {
return UriComponentsBuilder
.fromUriString(latestPageNoticeProperties.getListUrl())
.queryParam("page", page)
.queryParam("row", row)
.buildAndExpand(
noticeScrapInfo.getSiteName(),
noticeScrapInfo.getSiteName(),
noticeScrapInfo.getSiteId()
).toUriString();
}

public String createViewUrl(int index) {
return UriComponentsBuilder
.fromUriString(latestPageNoticeProperties.getViewUrl())
.queryParam("siteId", noticeScrapInfo.getSiteId())
.queryParam("boardSeq", noticeScrapInfo.getBoardSeqs().get(index))
.queryParam("menuSeq", noticeScrapInfo.getMenuSeqs().get(index))
.queryParam("seq", "")
.buildAndExpand(departmentName.getHostPrefix())
.toUriString();
public String createViewUrl() {
return latestPageNoticeProperties.getViewUrl()
.replaceAll("\\{department\\}", noticeScrapInfo.getSiteName())
.replace("{siteId}", String.valueOf(noticeScrapInfo.getSiteId()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,14 @@

import lombok.Getter;

import java.util.List;

@Getter
public class NoticeScrapInfo {

private final List<String> forumIds;
private final String siteId;
private final List<String> boardSeqs;
private final List<String> menuSeqs;
private final String siteName;
private final int siteId;

public NoticeScrapInfo(List<String> forumIds, String siteId, List<String> boardSeqs, List<String> menuSeqs) {
this.forumIds = forumIds;
public NoticeScrapInfo(String siteName, int siteId) {
this.siteName = siteName;
this.siteId = siteId;
this.boardSeqs = boardSeqs;
this.menuSeqs = menuSeqs;
}
}
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package com.kustacks.kuring.worker.scrap.deptinfo.architecture;

import com.kustacks.kuring.notice.domain.DepartmentName;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.client.notice.NoticeApiClient;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.NoticeScrapInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.RegisterDepartmentMap;
import com.kustacks.kuring.worker.scrap.deptinfo.StaffScrapInfo;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.parser.notice.NoticeHtmlParserTemplate;

import java.util.Collections;
import java.util.List;

@RegisterDepartmentMap(key = DepartmentName.ARCHITECTURE)
import static com.kustacks.kuring.notice.domain.DepartmentName.ARCHITECTURE;

@RegisterDepartmentMap(key = ARCHITECTURE)
public class ArchitectureDept extends ArchitectureCollege {

public ArchitectureDept(NoticeApiClient<ScrapingResultDto, DeptInfo> latestPageNoticeApiClient,
Expand All @@ -23,14 +23,9 @@ public ArchitectureDept(NoticeApiClient<ScrapingResultDto, DeptInfo> latestPageN
this.htmlParser = latestPageNoticeHtmlParser;
this.latestPageNoticeProperties = latestPageNoticeProperties;

List<String> professorForumIds = List.of("11830", "17940");
List<String> forumIds = Collections.emptyList();
List<String> boardSeqs = List.of("700");
List<String> menuSeqs = List.of("5168");

List<String> professorForumIds = List.of("9815");
this.staffScrapInfo = new StaffScrapInfo(professorForumIds);
this.noticeScrapInfo = new NoticeScrapInfo(forumIds, "CAKU", boardSeqs, menuSeqs);
this.code = "127320";
this.departmentName = DepartmentName.ARCHITECTURE;
this.noticeScrapInfo = new NoticeScrapInfo(ARCHITECTURE.getHostPrefix(), 397);
this.departmentName = ARCHITECTURE;
}
}
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package com.kustacks.kuring.worker.scrap.deptinfo.art_design;

import com.kustacks.kuring.notice.domain.DepartmentName;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.client.notice.NoticeApiClient;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.NoticeScrapInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.RegisterDepartmentMap;
import com.kustacks.kuring.worker.scrap.deptinfo.StaffScrapInfo;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.parser.notice.NoticeHtmlParserTemplate;

import java.util.Collections;
import java.util.List;

@RegisterDepartmentMap(key = DepartmentName.APPAREL_DESIGN)
import static com.kustacks.kuring.notice.domain.DepartmentName.APPAREL_DESIGN;

@RegisterDepartmentMap(key = APPAREL_DESIGN)
public class ApparelDesignDept extends ArtDesignCollege {

public ApparelDesignDept(NoticeApiClient<ScrapingResultDto, DeptInfo> latestPageNoticeApiClient,
Expand All @@ -23,14 +23,9 @@ public ApparelDesignDept(NoticeApiClient<ScrapingResultDto, DeptInfo> latestPage
this.htmlParser = latestPageNoticeHtmlParser;
this.latestPageNoticeProperties = latestPageNoticeProperties;

List<String> professorForumIds = List.of("9723");
List<String> forumIds = Collections.emptyList();
List<String> boardSeqs = List.of("1007");
List<String> menuSeqs = List.of("6987");

List<String> professorForumIds = List.of("11194");
this.staffScrapInfo = new StaffScrapInfo(professorForumIds);
this.noticeScrapInfo = new NoticeScrapInfo(forumIds, "APPAREL", boardSeqs, menuSeqs);
this.code = "122404";
this.departmentName = DepartmentName.APPAREL_DESIGN;
this.noticeScrapInfo = new NoticeScrapInfo(APPAREL_DESIGN.getHostPrefix(), 956);
this.departmentName = APPAREL_DESIGN;
}
}
Original file line number Diff line number Diff line change
@@ -1,36 +1,32 @@
package com.kustacks.kuring.worker.scrap.deptinfo.art_design;

import com.kustacks.kuring.notice.domain.DepartmentName;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.client.notice.NoticeApiClient;
import com.kustacks.kuring.worker.scrap.client.notice.property.LatestPageNoticeProperties;
import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.NoticeScrapInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.RegisterDepartmentMap;
import com.kustacks.kuring.worker.scrap.deptinfo.StaffScrapInfo;
import com.kustacks.kuring.worker.dto.ScrapingResultDto;
import com.kustacks.kuring.worker.scrap.parser.notice.NoticeHtmlParserTemplate;

import java.util.Collections;
import java.util.List;

@RegisterDepartmentMap(key = DepartmentName.COMM_DESIGN)
import static com.kustacks.kuring.notice.domain.DepartmentName.COMM_DESIGN;

@RegisterDepartmentMap(key = COMM_DESIGN)
public class CommunicationDesignDept extends ArtDesignCollege {

public CommunicationDesignDept(NoticeApiClient<ScrapingResultDto, DeptInfo> latestPageNoticeApiClient,
NoticeHtmlParserTemplate latestPageNoticeHtmlParserTwo, LatestPageNoticeProperties latestPageNoticeProperties) {
NoticeHtmlParserTemplate latestPageNoticeHtmlParser, LatestPageNoticeProperties latestPageNoticeProperties) {
super();
this.noticeApiClient = latestPageNoticeApiClient;
this.htmlParser = latestPageNoticeHtmlParserTwo;
this.htmlParser = latestPageNoticeHtmlParser;
this.latestPageNoticeProperties = latestPageNoticeProperties;

List<String> professorForumIds = Collections.emptyList();
List<String> forumIds = Collections.emptyList();
List<String> boardSeqs = Collections.emptyList();
List<String> menuSeqs = Collections.emptyList();

this.staffScrapInfo = new StaffScrapInfo(professorForumIds);
this.noticeScrapInfo = new NoticeScrapInfo(forumIds, "COMMDESIGN", boardSeqs, menuSeqs);
this.code = "122402";
this.departmentName = DepartmentName.COMM_DESIGN;
this.noticeScrapInfo = new NoticeScrapInfo(COMM_DESIGN.getHostPrefix(), 0);
this.departmentName = COMM_DESIGN;
}
}
Loading
Loading