Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

7-1 [BE] [메인 - 검색창 - 자동완성] Elasticsearch db 연결 #55

Merged
merged 20 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f7bcc87
feat: 테스트코드 추가 및 유효성 파이프 추가
JunYupK Nov 16, 2022
f4f4a7e
fix: 프리티어 재설정
JunYupK Nov 16, 2022
d7c1b57
Merge branch 'dev' of https://github.com/boostcampwm-2022/web18-PRV i…
JunYupK Nov 17, 2022
3629b26
refactor: ranking system ranking 10개의 배열만 return하도록 재설정 && @Param 로직 변경
JunYupK Nov 17, 2022
ab2df0d
Merge branch 'dev' of https://github.com/boostcampwm-2022/web18-PRV i…
JunYupK Nov 17, 2022
b967fd3
fix: 500 error 수정
JunYupK Nov 17, 2022
bbcbc76
chore: console.log 제거
JunYupK Nov 17, 2022
8ebd96b
refactor: ranking-system return 데이터 타입 다시 회귀, 500error 해결 및 author fi…
JunYupK Nov 17, 2022
80e63e6
Merge branch 'dev' of https://github.com/boostcampwm-2022/web18-PRV i…
JunYupK Nov 21, 2022
367a7e9
feat: 검색 api 와 연관 검색어 api와 로직 통합
JunYupK Nov 21, 2022
b86ec67
feat: redis 테스트코드 일부 작성
JunYupK Nov 22, 2022
1f2067b
feat: elastic search 연결 및 index + search 테스트 추가
JunYupK Nov 22, 2022
e7f0168
feat: 엘라스틱 서치 캐싱 기능 완료 (5개) 이후 캐싱 자동화 예정 (n개) 이후 validationPipe 개선 예정…
JunYupK Nov 22, 2022
1323c1c
chore: merge conflict 해결
JunYupK Nov 22, 2022
aa1c362
fix: fE merge Conflict 해결
JunYupK Nov 22, 2022
eaa6590
chore: pR 피드백 사항 적용
JunYupK Nov 23, 2022
c9b9d92
fix: elastic search auth 로직 수정
leesungbin Nov 23, 2022
7cc8e70
fix: index 없을 때 빈 result 전달
leesungbin Nov 23, 2022
4729854
feat: elasticsearch 배포시 사용
leesungbin Nov 23, 2022
dbccdc9
chore: 미사용 test file 삭제
leesungbin Nov 23, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30,062 changes: 15,093 additions & 14,969 deletions backend/package-lock.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"test:e2e": "jest --config ./test/jest-e2e.json"
},
"dependencies": {
"@elastic/elasticsearch": "^8.5.0",
"@liaoliaots/nestjs-redis": "^9.0.4",
"@nestjs/axios": "^1.0.0",
"@nestjs/common": "^9.0.0",
Expand All @@ -38,6 +39,7 @@
},
"devDependencies": {
"@nestjs/cli": "^9.0.0",
"@nestjs/elasticsearch": "^9.0.0",
"@nestjs/schematics": "^9.0.0",
"@nestjs/testing": "^9.0.0",
"@types/express": "^4.17.13",
Expand Down
34 changes: 34 additions & 0 deletions backend/src/ranking/ranking.controller.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { RedisModule } from '@liaoliaots/nestjs-redis';
import { ConfigModule } from '@nestjs/config';
import { Test, TestingModule } from '@nestjs/testing';
import { RankingController } from './ranking.controller';
import { RankingService } from './ranking.service';

describe('RankingServiceTest', () => {
let controller: RankingController;

beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
imports: [
ConfigModule.forRoot({ isGlobal: true, envFilePath: `.dev.env` }),
RedisModule.forRoot({
config: {
host: process.env.REDIS_HOST,
port: parseInt(process.env.REDIS_PORT, 10) || 6379,
password: process.env.REDIS_PASSWORD,
},
}),
],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redis module에 대한 mockup을 만들어서 진행해도 좋을 것 같아요~

controllers: [RankingController],
providers: [RankingService],
}).compile();
controller = module.get<RankingController>(RankingController);
});
it('get controller', async () => {
expect(controller).toBeDefined();
});
it('get Top10 Redis Data', async () => {
const topTen = await controller.getTen();
expect(topTen.length).toBeLessThanOrEqual(10);
});
});
10 changes: 5 additions & 5 deletions backend/src/ranking/ranking.controller.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import { Controller, Get, Param } from '@nestjs/common';
import { Controller, Get, Param, Query } from '@nestjs/common';
import { RankingService } from './ranking.service';

@Controller('keyword-ranking')
export class RankingController {
constructor(private readonly rankingService: RankingService) {}
@Get()
async getAll() {
return this.rankingService.getAll();
async getTen() {
return this.rankingService.getTen();
}
// TODO: search 됐을 때, this.popularService.insertRedis(searchStr);
@Get(':keyword')
async insertCache(@Param('keyword') searchStr: string) {
@Get('/insert')
async insertCache(@Query('keyword') searchStr: string) {
this.rankingService.insertRedis(searchStr);
}
}
1 change: 1 addition & 0 deletions backend/src/ranking/ranking.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { RedisModule } from '@liaoliaots/nestjs-redis';
import { RankingController } from './ranking.controller';
import { RankingService } from './ranking.service';
import { ConfigModule } from '@nestjs/config';

@Module({
imports: [
ConfigModule.forRoot({ isGlobal: true, envFilePath: `.dev.env` }),
Expand Down
16 changes: 10 additions & 6 deletions backend/src/ranking/ranking.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@ import { Ranking } from './entities/ranking.entity';
@Injectable()
export class RankingService {
constructor(@InjectRedis() private readonly redis: Redis) {}
async getAll() {
async getTen() {
const redisSearchData = await this.redis.zrevrangebyscore(process.env.REDIS_POPULAR_KEY, '+inf', 1);
const topTen = redisSearchData.slice(0, 10);
const result: Ranking[] = [];
topTen.map((v) => {
const tmp: Ranking = { keyword: '', count: 0 };
tmp.keyword = v;
result.push(tmp);
});
await Promise.all(
topTen.map(async (v) => {
const tmp: Ranking = { keyword: '', count: 0 };
tmp.keyword = v;
const score = await this.redis.zscore(process.env.REDIS_POPULAR_KEY, v);
tmp.count = Number(score);
result.push(tmp);
}),
);
return result;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

이 부분은 result에 push하지 않고, Promise.all 자체를 return 해줘도 될 것 같은데요?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

이 부분은 추후에 수정해보록하겠습니다. 이 PR은 elastic search에 관심을 두어야 할 것 같습니다.

}
async insertRedis(data: string) {
Expand Down
35 changes: 35 additions & 0 deletions backend/src/search/batch.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { Injectable } from '@nestjs/common';
import { HttpService } from '@nestjs/axios';

@Injectable()
export class BatchService {
private keywordQueue: string[] = [];
private urlQueue: string[] = [];

constructor(private readonly httpService: HttpService) {}
pushKeyword(keyword: string) {
// (만약 일정 시간안에 검색했던 키워드라면, 다시 keywordQueue에 push할 필요는 없을듯 Redis TTL 사용할 수 있을듯)
this.keywordQueue.push(keyword);
}

// BatchService의 목적 : 논문을 최대한 많이 우리 DB로 가져오기
// keyword 검색 -> BatchService에 키워드 추가
// (만약 일정 시간안에 검색했던 키워드라면, 다시 keywordQueue에 push할 필요는 없을듯 Redis TTL 사용할 수 있을듯)
// keyword가 keywordQueue에 push되면
// 1. urlQueue에 keyword에 해당하는 모든 pagination 된 url들 집어넣기 (rows=2000? 괜찮을까?)
// 2. urlQueue에 데이터가 들어오면, runBatch가 돌아가야함 (setInterval? or Observer?)
// 3. runBatch에서는 적당량의 요청(ex 50개)을 잘라서 보내야함 (이 때 axios instance를 별도로 만들어서 사용하면 자동 retry 해결)
// 4. 이미 elasticsearch에 들어가있는 자료일 수 있음. crossref api에서는 create 역순으로 정렬되어있던가... 할 필요가 있음. sort=created,DESC였나
// Q. 중복 push를 허용할 것인가 / 넣기 전에 애초에 걸러버릴 것인가.
// 5. batch 작업을 통해 얻어진 결과들은 elasticsearch에 인덱싱 되어야함.

pushUrl(url: string) {
this.urlQueue.push(url);
}
async runBatch() {
const num = 50;
const batch = this.urlQueue.slice(0, num);
// TODO : 실패한 url retry
await Promise.allSettled(batch.map((url) => this.httpService.axiosRef.get(url)));
}
}
17 changes: 17 additions & 0 deletions backend/src/search/pipe/search.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { IsNotEmpty, IsInt, IsBoolean } from 'class-validator';

export class SearchDto {
// TODO : SearchValidationPipe 거쳐야함
// 현재는 crossref query 사용자에 의해 조작 가능
@IsNotEmpty()
keyword: string;

@IsInt()
rows = 20;

@IsInt()
page = 1;

@IsBoolean()
hasDoi = true;
}
36 changes: 27 additions & 9 deletions backend/src/search/search.controller.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
import { Controller, Get, Query } from '@nestjs/common';
import { Controller, Get, Query, UsePipes, ValidationPipe } from '@nestjs/common';
import { SearchService } from './search.service';
import { PositiveIntegerValidationPipe, SearchValidationPipe } from './pipe/search.pipe';
import { SearchDto } from './pipe/search.dto';
import { SearchTotalHits } from '@elastic/elasticsearch/lib/api/types';
import { BatchService } from './batch.service';

@Controller('search')
export class SearchController {
constructor(private readonly searchService: SearchService) {}
constructor(private readonly searchService: SearchService, private readonly batchService: BatchService) {}
@Get('auto-complete')
async getAutoCompletePapers(@Query('keyword', SearchValidationPipe) keyword: string) {
const items = await this.searchService.getCrossRefAutoCompleteData(keyword);
this.batchService.pushKeyword(keyword);

const elastic = await this.searchService.getElasticSearch(keyword);
console.log(elastic.hits.hits);
const elasticDataCount = (elastic.hits.total as SearchTotalHits).value;
if (elasticDataCount > 0) {
return elastic.hits.hits.map((paper) => paper._source);
}

const { items } = await this.searchService.getCrossRefAutoCompleteData(keyword);
const papers = this.searchService.parseCrossRefData(items);
papers.map((paper) => {
this.searchService.putElasticSearch(paper);
});
// crossref에 있는거 다 갖고오기.
// this.searchService.crawlAllCrossRefData(keyword, totalItems, 1000);
return papers;
}

@Get()
async getPapers(
@Query('keyword', SearchValidationPipe) keyword: string,
@Query('rows', PositiveIntegerValidationPipe) rows = 20,
@Query('page', PositiveIntegerValidationPipe) page = 1,
@Query('hasDoi') hasDoi = true,
) {
@UsePipes(ValidationPipe)
async getPapers(@Query() query: SearchDto) {
const { keyword, rows, page, hasDoi } = query;
const { items, totalItems } = await this.searchService.getCrossRefData(keyword, rows, page, hasDoi);
const papers = this.searchService.parseCrossRefData(items);
return {
Expand All @@ -29,4 +43,8 @@ export class SearchController {
},
};
}
@Get('getAll')
async getAllElastic() {
return await this.searchService.getAllElasticData();
}
}
23 changes: 21 additions & 2 deletions backend/src/search/search.module.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
import { Module } from '@nestjs/common';
import { SearchController } from './search.controller';
import { SearchService } from './search.service';
import { BatchService } from './batch.service';
import { HttpModule } from '@nestjs/axios';
import { ElasticsearchModule } from '@nestjs/elasticsearch';
import { HttpConnection } from '@elastic/elasticsearch';
@Module({
imports: [HttpModule],
imports: [
HttpModule,
ElasticsearchModule.registerAsync({
useFactory: () => ({
node: process.env.ELASTIC_HOST,
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
},
auth: {
username: process.env.ELASTIC_USER,
password: process.env.ELASTIC_PASSWORD,
},
Connection: HttpConnection,
}),
}),
],
controllers: [SearchController],
providers: [SearchService],
providers: [SearchService, BatchService],
})
export class SearchModule {}
75 changes: 72 additions & 3 deletions backend/src/search/search.service.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import { Injectable } from '@nestjs/common';
import { HttpService } from '@nestjs/axios';
import { CrossRefResponse, CrossRefItem, PaperInfoExtended } from './entities/crossRef.entity';
import { CrossRefResponse, CrossRefItem, PaperInfoExtended, PaperInfo } from './entities/crossRef.entity';
import { CROSSREF_API_URL } from '../util';
import { ElasticsearchService } from '@nestjs/elasticsearch';
import { SearchHit } from '@elastic/elasticsearch/lib/api/types';

@Injectable()
export class SearchService {
constructor(private readonly httpService: HttpService) {}
constructor(private readonly httpService: HttpService, private readonly esService: ElasticsearchService) {}
async getCrossRefAutoCompleteData(keyword: string) {
const crossRefdata = await this.httpService.axiosRef.get<CrossRefResponse>(CROSSREF_API_URL(keyword));
const items = crossRefdata.data.message.items;
return items;
const totalItems = crossRefdata.data.message['total-results'];
return { items, totalItems };
}

async getCrossRefData(keyword: string, rows: number, page: number, isDoiExist: boolean) {
Expand All @@ -26,6 +29,27 @@ export class SearchService {
return { items, totalItems };
}

async crawlAllCrossRefData(keyword: string, totalItems: number, rows: number) {
const pages = await Promise.allSettled(
Array(Math.ceil(totalItems / rows))
.fill(0)
.map((_, i) => {
return this.getCrossRefData(keyword, rows, i + 1, false);
}),
);
console.log(totalItems);
console.log(pages.length);
pages.forEach((page) => {
if (page.status === 'fulfilled') {
const papers = this.parseCrossRefData(page.value.items);
papers.forEach((paper) => {
this.putElasticSearch(paper);
});
Comment on lines +45 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

papers 배열을 elasticSearch에 한번에 넣어도 될 것 같습니다.

} else {
console.log(page.reason);
}
});
}
parseCrossRefData(items: CrossRefItem[]) {
return items
.map((item) => {
Expand All @@ -44,4 +68,49 @@ export class SearchService {
})
.filter((info) => info.title || info.authors?.length > 0);
}
async putElasticSearch(paper: PaperInfoExtended) {
return await this.esService.index({
index: process.env.ELASTIC_INDEX,
id: paper.doi,
document: {
...paper,
},
});
}
async getElasticSearch(keyword: string, size = 5) {
const query = {
bool: {
should: [
{
match_bool_prefix: {
title: {
query: keyword,
},
},
},
{
match_bool_prefix: {
author: {
query: keyword,
},
},
},
],
},
};
return await this.esService
.search<PaperInfo>({
index: process.env.ELASTIC_INDEX,
size,
query,
})
.catch(() => {
return { hits: { hits: [] as SearchHit<PaperInfo>[], total: 0 } };
});
}
async getAllElasticData() {
return await this.esService.search({ index: process.env.ELASTIC_INDEX });
}
//match: title , author (상위5개의 fuzzi점수를 비교해서 큰쪽을 가져가는걸로)
}
//title, author
13 changes: 13 additions & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ services:
links:
- redis
- mongo
- elastic
ports:
- 4000:4000
networks:
Expand All @@ -34,6 +35,18 @@ services:
ports:
- 3000:80

elastic:
image: elasticsearch:8.5.1
ports:
- 9200:9200
- 9300:9300
environment:
- discovery.type=single-node
- xpack.security.enabled=true
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
networks:
- viewpoint

networks:
viewpoint:
driver: bridge