Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public interface ArticleKeywordRepository extends Repository<ArticleKeyword, Int

List<ArticleKeyword> findAll(Pageable pageable);

List<ArticleKeyword> findAll();

@Query("""
SELECT new in.koreatech.koin.domain.community.article.dto.ArticleKeywordResult(k.id, k.keyword, COUNT(u))
FROM ArticleKeywordUserMap u
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
import java.util.Map;
import java.util.stream.Collectors;

import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import in.koreatech.koin.common.event.ArticleKeywordEvent;
import in.koreatech.koin.domain.community.article.model.Article;
import in.koreatech.koin.domain.community.keyword.model.ArticleKeyword;
import in.koreatech.koin.domain.community.keyword.model.ArticleKeywordUserMap;
import in.koreatech.koin.common.event.ArticleKeywordEvent;
import in.koreatech.koin.domain.community.keyword.repository.ArticleKeywordRepository;
import in.koreatech.koin.domain.community.keyword.repository.ArticleKeywordUserMapRepository;
import lombok.RequiredArgsConstructor;
Expand All @@ -24,56 +22,49 @@
@Transactional(readOnly = true)
public class KeywordExtractor {

private static final int KEYWORD_BATCH_SIZE = 100;

private final ArticleKeywordRepository articleKeywordRepository;
private final ArticleKeywordUserMapRepository articleKeywordUserMapRepository;

public List<ArticleKeywordEvent> matchKeyword(List<Article> articles, Integer authorId) {
Map<Integer, Map<Integer, String>> matchedKeywordByUserIdByArticleId = new LinkedHashMap<>();
int offset = 0;
List<ArticleKeyword> keywords = articleKeywordRepository.findAll();

while (true) {
Pageable pageable = PageRequest.of(offset / KEYWORD_BATCH_SIZE, KEYWORD_BATCH_SIZE);
List<ArticleKeyword> keywords = articleKeywordRepository.findAll(pageable);
if (keywords.isEmpty()) {
return List.of();
}

if (keywords.isEmpty()) {
break;
}
List<Integer> keywordIds = keywords.stream()
.map(ArticleKeyword::getId)
.toList();
Map<Integer, List<ArticleKeywordUserMap>> userMapsByKeywordId = articleKeywordUserMapRepository
.findAllByArticleKeywordIdIn(keywordIds)
.stream()
.filter(keywordUserMap -> !keywordUserMap.getIsDeleted())
.collect(Collectors.groupingBy(
keywordUserMap -> keywordUserMap.getArticleKeyword().getId(),
LinkedHashMap::new,
Collectors.toList()
));
List<Integer> keywordIds = keywords.stream()
.map(ArticleKeyword::getId)
.toList();
Map<Integer, List<ArticleKeywordUserMap>> userMapsByKeywordId = articleKeywordUserMapRepository
.findAllByArticleKeywordIdIn(keywordIds)
.stream()
.filter(keywordUserMap -> !keywordUserMap.getIsDeleted())
.collect(Collectors.groupingBy(
keywordUserMap -> keywordUserMap.getArticleKeyword().getId(),
LinkedHashMap::new,
Collectors.toList()
));
Comment on lines +29 to +46
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Keep the paged scan on this request path.

This now loads the full ArticleKeyword table and then issues an unbounded findAllByArticleKeywordIdIn(keywordIds) over every keyword id before checking a few titles. src/main/java/in/koreatech/koin/domain/community/article/service/LostItemArticleService.java:173-186 and 254-261 call matchKeyword() during lost-item creation, so latency and memory now grow with total keyword volume instead of the current articles input. Please keep the batched/paged scan here, or move the full scan behind a cached/background path if this is only needed for testing.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/main/java/in/koreatech/koin/domain/community/util/KeywordExtractor.java`
around lines 29 - 46, The current implementation in KeywordExtractor loads all
ArticleKeyword rows and then calls
articleKeywordUserMapRepository.findAllByArticleKeywordIdIn over every id,
causing unbounded memory/latency when matchKeyword() is called during lost-item
creation; change this to a paged/batched scan: iterate ArticleKeyword pages (use
ArticleKeywordRepository pageable/streaming API) and for each page collect its
ids and call articleKeywordUserMapRepository.findAllByArticleKeywordIdIn for
just that batch (or use a repository method that accepts a page/stream), build
the userMapsByKeywordId incrementally (merging per-page results) and keep
filtering out deleted ArticleKeywordUserMap entries as before so memory and
latency scale with the input batch rather than the whole table.


for (Article article : articles) {
String title = article.getTitle();
for (ArticleKeyword keyword : keywords) {
if (!title.contains(keyword.getKeyword())) {
continue;
}
Map<Integer, String> matchedKeywordByUserId = matchedKeywordByUserIdByArticleId
.computeIfAbsent(article.getId(), ignored -> new LinkedHashMap<>());
Map<Integer, Map<Integer, String>> matchedKeywordByUserIdByArticleId = new LinkedHashMap<>();
for (Article article : articles) {
String title = article.getTitle();
for (ArticleKeyword keyword : keywords) {
if (!title.contains(keyword.getKeyword())) {
continue;
}
Map<Integer, String> matchedKeywordByUserId = matchedKeywordByUserIdByArticleId
.computeIfAbsent(article.getId(), ignored -> new LinkedHashMap<>());

for (ArticleKeywordUserMap keywordUserMap :
userMapsByKeywordId.getOrDefault(keyword.getId(), List.of())) {
Integer userId = keywordUserMap.getUser().getId();
matchedKeywordByUserId.merge(
userId,
keyword.getKeyword(),
this::pickHigherPriorityKeyword
);
}
for (ArticleKeywordUserMap keywordUserMap :
userMapsByKeywordId.getOrDefault(keyword.getId(), List.of())) {
Integer userId = keywordUserMap.getUser().getId();
matchedKeywordByUserId.merge(
userId,
keyword.getKeyword(),
this::pickHigherPriorityKeyword
);
}
}
offset += KEYWORD_BATCH_SIZE;
}

List<ArticleKeywordEvent> keywordEvents = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.data.domain.Pageable;
import org.springframework.test.util.ReflectionTestUtils;

import in.koreatech.koin.common.event.ArticleKeywordEvent;
Expand Down Expand Up @@ -51,9 +50,8 @@ void matchKeyword_withMultipleMatchedKeywordsInSingleArticle_createsSingleEvent(
ArticleKeyword keywordA = createKeyword(1, "근로", subscriber);
ArticleKeyword keywordB = createKeyword(2, "근로장학", subscriber);

when(articleKeywordRepository.findAll(any(Pageable.class)))
.thenReturn(List.of(keywordA, keywordB))
.thenReturn(List.of());
when(articleKeywordRepository.findAll())
.thenReturn(List.of(keywordA, keywordB));
when(articleKeywordUserMapRepository.findAllByArticleKeywordIdIn(any()))
.thenReturn(List.of(
keywordA.getArticleKeywordUserMaps().get(0),
Expand All @@ -79,9 +77,8 @@ void matchKeyword_whenNoKeywordsMatch_returnsEmptyResult() {
User subscriber = UserFixture.id_설정_코인_유저(1);
ArticleKeyword keyword = createKeyword(1, "장학금", subscriber);

when(articleKeywordRepository.findAll(any(Pageable.class)))
.thenReturn(List.of(keyword))
.thenReturn(List.of());
when(articleKeywordRepository.findAll())
.thenReturn(List.of(keyword));
when(articleKeywordUserMapRepository.findAllByArticleKeywordIdIn(any()))
.thenReturn(List.of(keyword.getArticleKeywordUserMaps().get(0)));

Expand All @@ -106,9 +103,8 @@ void matchKeyword_withMultipleArticles_createsEventPerArticle() {
ArticleKeyword firstKeyword = createKeyword(1, "근로", firstSubscriber);
ArticleKeyword secondKeyword = createKeyword(2, "장학금", secondSubscriber);

when(articleKeywordRepository.findAll(any(Pageable.class)))
.thenReturn(List.of(firstKeyword, secondKeyword))
.thenReturn(List.of());
when(articleKeywordRepository.findAll())
.thenReturn(List.of(firstKeyword, secondKeyword));
when(articleKeywordUserMapRepository.findAllByArticleKeywordIdIn(any()))
.thenReturn(List.of(
firstKeyword.getArticleKeywordUserMaps().get(0),
Expand All @@ -128,8 +124,7 @@ void matchKeyword_withMultipleArticles_createsEventPerArticle() {
@DisplayName("등록된 키워드가 없으면 빈 결과를 반환한다.")
void matchKeyword_whenNoKeywordsExist_returnsEmptyResult() {
Article article = mock(Article.class);
when(article.getId()).thenReturn(1);
when(articleKeywordRepository.findAll(any(Pageable.class))).thenReturn(List.of());
when(articleKeywordRepository.findAll()).thenReturn(List.of());

List<ArticleKeywordEvent> result = keywordExtractor.matchKeyword(List.of(article), null);

Expand Down
Loading