-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
7-1 [BE] [메인 - 검색창 - 자동완성] Elasticsearch db 연결 #55
Changes from all commits
f7bcc87
f4f4a7e
d7c1b57
3629b26
ab2df0d
b967fd3
bbcbc76
8ebd96b
80e63e6
367a7e9
b86ec67
1f2067b
e7f0168
1323c1c
aa1c362
eaa6590
c9b9d92
7cc8e70
4729854
dbccdc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import { RedisModule } from '@liaoliaots/nestjs-redis'; | ||
import { ConfigModule } from '@nestjs/config'; | ||
import { Test, TestingModule } from '@nestjs/testing'; | ||
import { RankingController } from './ranking.controller'; | ||
import { RankingService } from './ranking.service'; | ||
|
||
describe('RankingServiceTest', () => { | ||
let controller: RankingController; | ||
|
||
beforeEach(async () => { | ||
const module: TestingModule = await Test.createTestingModule({ | ||
imports: [ | ||
ConfigModule.forRoot({ isGlobal: true, envFilePath: `.dev.env` }), | ||
RedisModule.forRoot({ | ||
config: { | ||
host: process.env.REDIS_HOST, | ||
port: parseInt(process.env.REDIS_PORT, 10) || 6379, | ||
password: process.env.REDIS_PASSWORD, | ||
}, | ||
}), | ||
], | ||
controllers: [RankingController], | ||
providers: [RankingService], | ||
}).compile(); | ||
controller = module.get<RankingController>(RankingController); | ||
}); | ||
it('get controller', async () => { | ||
expect(controller).toBeDefined(); | ||
}); | ||
it('get Top10 Redis Data', async () => { | ||
const topTen = await controller.getTen(); | ||
expect(topTen.length).toBeLessThanOrEqual(10); | ||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,16 @@ | ||
import { Controller, Get, Param } from '@nestjs/common'; | ||
import { Controller, Get, Param, Query } from '@nestjs/common'; | ||
import { RankingService } from './ranking.service'; | ||
|
||
@Controller('keyword-ranking') | ||
export class RankingController { | ||
constructor(private readonly rankingService: RankingService) {} | ||
@Get() | ||
async getAll() { | ||
return this.rankingService.getAll(); | ||
async getTen() { | ||
return this.rankingService.getTen(); | ||
} | ||
// TODO: search 됐을 때, this.popularService.insertRedis(searchStr); | ||
@Get(':keyword') | ||
async insertCache(@Param('keyword') searchStr: string) { | ||
@Get('/insert') | ||
async insertCache(@Query('keyword') searchStr: string) { | ||
this.rankingService.insertRedis(searchStr); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,15 +6,19 @@ import { Ranking } from './entities/ranking.entity'; | |
@Injectable() | ||
export class RankingService { | ||
constructor(@InjectRedis() private readonly redis: Redis) {} | ||
async getAll() { | ||
async getTen() { | ||
const redisSearchData = await this.redis.zrevrangebyscore(process.env.REDIS_POPULAR_KEY, '+inf', 1); | ||
const topTen = redisSearchData.slice(0, 10); | ||
const result: Ranking[] = []; | ||
topTen.map((v) => { | ||
const tmp: Ranking = { keyword: '', count: 0 }; | ||
tmp.keyword = v; | ||
result.push(tmp); | ||
}); | ||
await Promise.all( | ||
topTen.map(async (v) => { | ||
const tmp: Ranking = { keyword: '', count: 0 }; | ||
tmp.keyword = v; | ||
const score = await this.redis.zscore(process.env.REDIS_POPULAR_KEY, v); | ||
tmp.count = Number(score); | ||
result.push(tmp); | ||
}), | ||
); | ||
return result; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 부분은 result에 push하지 않고, Promise.all 자체를 return 해줘도 될 것 같은데요? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 부분은 추후에 수정해보록하겠습니다. 이 PR은 elastic search에 관심을 두어야 할 것 같습니다. |
||
} | ||
async insertRedis(data: string) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import { Injectable } from '@nestjs/common'; | ||
import { HttpService } from '@nestjs/axios'; | ||
|
||
@Injectable() | ||
export class BatchService { | ||
private keywordQueue: string[] = []; | ||
private urlQueue: string[] = []; | ||
|
||
constructor(private readonly httpService: HttpService) {} | ||
pushKeyword(keyword: string) { | ||
// (만약 일정 시간안에 검색했던 키워드라면, 다시 keywordQueue에 push할 필요는 없을듯 Redis TTL 사용할 수 있을듯) | ||
this.keywordQueue.push(keyword); | ||
} | ||
|
||
// BatchService의 목적 : 논문을 최대한 많이 우리 DB로 가져오기 | ||
// keyword 검색 -> BatchService에 키워드 추가 | ||
// (만약 일정 시간안에 검색했던 키워드라면, 다시 keywordQueue에 push할 필요는 없을듯 Redis TTL 사용할 수 있을듯) | ||
// keyword가 keywordQueue에 push되면 | ||
// 1. urlQueue에 keyword에 해당하는 모든 pagination 된 url들 집어넣기 (rows=2000? 괜찮을까?) | ||
// 2. urlQueue에 데이터가 들어오면, runBatch가 돌아가야함 (setInterval? or Observer?) | ||
// 3. runBatch에서는 적당량의 요청(ex 50개)을 잘라서 보내야함 (이 때 axios instance를 별도로 만들어서 사용하면 자동 retry 해결) | ||
// 4. 이미 elasticsearch에 들어가있는 자료일 수 있음. crossref api에서는 create 역순으로 정렬되어있던가... 할 필요가 있음. sort=created,DESC였나 | ||
// Q. 중복 push를 허용할 것인가 / 넣기 전에 애초에 걸러버릴 것인가. | ||
// 5. batch 작업을 통해 얻어진 결과들은 elasticsearch에 인덱싱 되어야함. | ||
|
||
pushUrl(url: string) { | ||
this.urlQueue.push(url); | ||
} | ||
async runBatch() { | ||
const num = 50; | ||
const batch = this.urlQueue.slice(0, num); | ||
// TODO : 실패한 url retry | ||
await Promise.allSettled(batch.map((url) => this.httpService.axiosRef.get(url))); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import { IsNotEmpty, IsInt, IsBoolean } from 'class-validator'; | ||
|
||
export class SearchDto { | ||
// TODO : SearchValidationPipe 거쳐야함 | ||
// 현재는 crossref query 사용자에 의해 조작 가능 | ||
@IsNotEmpty() | ||
keyword: string; | ||
|
||
@IsInt() | ||
rows = 20; | ||
|
||
@IsInt() | ||
page = 1; | ||
|
||
@IsBoolean() | ||
hasDoi = true; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,29 @@ | ||
import { Module } from '@nestjs/common'; | ||
import { SearchController } from './search.controller'; | ||
import { SearchService } from './search.service'; | ||
import { BatchService } from './batch.service'; | ||
import { HttpModule } from '@nestjs/axios'; | ||
import { ElasticsearchModule } from '@nestjs/elasticsearch'; | ||
import { HttpConnection } from '@elastic/elasticsearch'; | ||
@Module({ | ||
imports: [HttpModule], | ||
imports: [ | ||
HttpModule, | ||
ElasticsearchModule.registerAsync({ | ||
useFactory: () => ({ | ||
node: process.env.ELASTIC_HOST, | ||
headers: { | ||
Accept: 'application/json', | ||
'Content-Type': 'application/json', | ||
}, | ||
auth: { | ||
username: process.env.ELASTIC_USER, | ||
password: process.env.ELASTIC_PASSWORD, | ||
}, | ||
Connection: HttpConnection, | ||
}), | ||
}), | ||
], | ||
controllers: [SearchController], | ||
providers: [SearchService], | ||
providers: [SearchService, BatchService], | ||
}) | ||
export class SearchModule {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,18 @@ | ||
import { Injectable } from '@nestjs/common'; | ||
import { HttpService } from '@nestjs/axios'; | ||
import { CrossRefResponse, CrossRefItem, PaperInfoExtended } from './entities/crossRef.entity'; | ||
import { CrossRefResponse, CrossRefItem, PaperInfoExtended, PaperInfo } from './entities/crossRef.entity'; | ||
import { CROSSREF_API_URL } from '../util'; | ||
import { ElasticsearchService } from '@nestjs/elasticsearch'; | ||
import { SearchHit } from '@elastic/elasticsearch/lib/api/types'; | ||
|
||
@Injectable() | ||
export class SearchService { | ||
constructor(private readonly httpService: HttpService) {} | ||
constructor(private readonly httpService: HttpService, private readonly esService: ElasticsearchService) {} | ||
async getCrossRefAutoCompleteData(keyword: string) { | ||
const crossRefdata = await this.httpService.axiosRef.get<CrossRefResponse>(CROSSREF_API_URL(keyword)); | ||
const items = crossRefdata.data.message.items; | ||
return items; | ||
const totalItems = crossRefdata.data.message['total-results']; | ||
return { items, totalItems }; | ||
} | ||
|
||
async getCrossRefData(keyword: string, rows: number, page: number, isDoiExist: boolean) { | ||
|
@@ -26,6 +29,27 @@ export class SearchService { | |
return { items, totalItems }; | ||
} | ||
|
||
async crawlAllCrossRefData(keyword: string, totalItems: number, rows: number) { | ||
const pages = await Promise.allSettled( | ||
Array(Math.ceil(totalItems / rows)) | ||
.fill(0) | ||
.map((_, i) => { | ||
return this.getCrossRefData(keyword, rows, i + 1, false); | ||
}), | ||
); | ||
console.log(totalItems); | ||
console.log(pages.length); | ||
pages.forEach((page) => { | ||
if (page.status === 'fulfilled') { | ||
const papers = this.parseCrossRefData(page.value.items); | ||
papers.forEach((paper) => { | ||
this.putElasticSearch(paper); | ||
}); | ||
Comment on lines
+45
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. papers 배열을 elasticSearch에 한번에 넣어도 될 것 같습니다. |
||
} else { | ||
console.log(page.reason); | ||
} | ||
}); | ||
} | ||
parseCrossRefData(items: CrossRefItem[]) { | ||
return items | ||
.map((item) => { | ||
|
@@ -44,4 +68,49 @@ export class SearchService { | |
}) | ||
.filter((info) => info.title || info.authors?.length > 0); | ||
} | ||
async putElasticSearch(paper: PaperInfoExtended) { | ||
return await this.esService.index({ | ||
index: process.env.ELASTIC_INDEX, | ||
id: paper.doi, | ||
document: { | ||
...paper, | ||
}, | ||
}); | ||
} | ||
async getElasticSearch(keyword: string, size = 5) { | ||
const query = { | ||
bool: { | ||
should: [ | ||
{ | ||
match_bool_prefix: { | ||
title: { | ||
query: keyword, | ||
}, | ||
}, | ||
}, | ||
{ | ||
match_bool_prefix: { | ||
author: { | ||
query: keyword, | ||
}, | ||
}, | ||
}, | ||
], | ||
}, | ||
}; | ||
return await this.esService | ||
.search<PaperInfo>({ | ||
index: process.env.ELASTIC_INDEX, | ||
size, | ||
query, | ||
}) | ||
.catch(() => { | ||
return { hits: { hits: [] as SearchHit<PaperInfo>[], total: 0 } }; | ||
}); | ||
} | ||
async getAllElasticData() { | ||
return await this.esService.search({ index: process.env.ELASTIC_INDEX }); | ||
} | ||
//match: title , author (상위5개의 fuzzi점수를 비교해서 큰쪽을 가져가는걸로) | ||
} | ||
//title, author |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
redis module에 대한 mockup을 만들어서 진행해도 좋을 것 같아요~