Skip to content

Commit a2f3666

Browse files
committedSep 23, 2021
Create : app
1 parent faf358a commit a2f3666

File tree

17 files changed

+293
-0
lines changed

17 files changed

+293
-0
lines changed
 

‎.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
secrets.json
2+
13
# Byte-compiled / optimized / DLL files
24
__pycache__/
35
*.py[cod]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/
2+
# pip install beautifulsoup4
3+
4+
"""
5+
웹 크롤링 : 검색 엔진의 구축 등을 위하여 특정한 방법으로 웹 페이지를 수집하는 프로그램
6+
웹 스크래핑 : 웹에서 데이터를 수집하는 프로그램
7+
"""
8+
9+
from bs4 import BeautifulSoup
10+
11+
12+
html_doc = """
13+
<html><head><title>The Dormouse's story</title></head>
14+
<body>
15+
<p class="title"><b>The Dormouse's story</b></p>
16+
17+
<p class="story">Once upon a time there were three little sisters; and their names were
18+
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
19+
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
20+
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
21+
and they lived at the bottom of a well.</p>
22+
23+
<p class="story">...</p>
24+
"""
25+
26+
soup = BeautifulSoup(html_doc, "html.parser")
27+
28+
print(soup.prettify())
29+
30+
print(soup.title)
31+
32+
print(soup.p)
33+
34+
print(soup.find("p", "title"))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/
2+
# pip install beautifulsoup4
3+
4+
"""
5+
웹 크롤링 : 검색 엔진의 구축 등을 위하여 특정한 방법으로 웹 페이지를 수집하는 프로그램
6+
웹 스크래핑 : 웹에서 데이터를 수집하는 프로그램
7+
"""
8+
9+
from bs4 import BeautifulSoup

‎app/__init__.py

Whitespace-only changes.

‎app/config.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import json
2+
from pathlib import Path
3+
from typing import Optional
4+
5+
6+
BASE_DIR = Path(__file__).resolve().parent
7+
8+
9+
def get_secret(
10+
key: str,
11+
default_value: Optional[str] = None,
12+
json_path: str = str(BASE_DIR / "secrets.json"),
13+
):
14+
with open(json_path) as f:
15+
secrets = json.loads(f.read())
16+
try:
17+
return secrets[key]
18+
except KeyError:
19+
if default_value:
20+
return default_value
21+
raise EnvironmentError(f"Set the {key} environment variable.")
22+
23+
24+
if __name__ == "__main__":
25+
world = get_secret("hello")
26+
print(world)

‎app/main.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from fastapi import FastAPI, Request
2+
from fastapi.responses import HTMLResponse
3+
from fastapi.templating import Jinja2Templates
4+
from fastapi.staticfiles import StaticFiles
5+
from app.config import BASE_DIR
6+
from app.models import mongodb
7+
from app.scrapers import photo_scraper
8+
9+
10+
app = FastAPI(title="데이터 수집가", version="0.0.1")
11+
app.mount("/statics", StaticFiles(directory=BASE_DIR / "statics"), name="static")
12+
13+
14+
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
15+
16+
17+
@app.get("/", response_class=HTMLResponse)
18+
async def root(request: Request):
19+
context = {"request": request, "title": "데이터 수집가"}
20+
return templates.TemplateResponse("index.html", context=context)
21+
22+
23+
@app.get("/search", response_class=HTMLResponse)
24+
async def search_result(request: Request):
25+
# print(request)
26+
# print(request.query_params)
27+
# print(request.query_params.get("q"))
28+
keyword = request.query_params.get("q")
29+
result = await photo_scraper.search(keyword, 50)
30+
context = {"request": request, "keyword": keyword, "result": result}
31+
return templates.TemplateResponse("index.html", context=context)
32+
33+
34+
@app.on_event("startup")
35+
async def on_app_start():
36+
"""
37+
before app starts
38+
"""
39+
await mongodb.connect()
40+
41+
42+
@app.on_event("shutdown")
43+
async def on_app_shutdown():
44+
"""
45+
after app shutdown
46+
"""
47+
await mongodb.close()

‎app/models/__init__.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from motor.motor_asyncio import AsyncIOMotorClient
2+
from odmantic import AIOEngine
3+
from app.config import get_secret
4+
5+
6+
class __MongoDB:
7+
8+
MONGO_URL = get_secret("MONGO_URL")
9+
MONGO_DB_NAME = get_secret("MONGO_DB_NAME")
10+
MONGO_MAX_CONNECTIONS = get_secret("MONGO_MAX_CONNECTIONS", "10")
11+
MONGO_MIN_CONNECTIONS = get_secret("MONGO_MIN_CONNECTIONS", "10")
12+
13+
def __init__(self) -> None:
14+
self.__client: AsyncIOMotorClient = None
15+
self.__engine: AIOEngine = None
16+
17+
@property
18+
def client(self) -> AsyncIOMotorClient:
19+
return self.__client
20+
21+
@property
22+
def engine(self) -> AIOEngine:
23+
return self.__engine
24+
25+
async def connect(self):
26+
"""
27+
Connect to MongoDB
28+
"""
29+
self.__client = AsyncIOMotorClient(
30+
self.MONGO_URL,
31+
maxPoolSize=self.MONGO_MAX_CONNECTIONS,
32+
minPoolSize=self.MONGO_MIN_CONNECTIONS,
33+
)
34+
self.__engine: AIOEngine = AIOEngine(
35+
motor_client=self.__client, database=self.MONGO_DB_NAME
36+
)
37+
38+
async def close(self):
39+
"""
40+
Close MongoDB Connection
41+
"""
42+
self.__client.close()
43+
44+
45+
mongodb = __MongoDB()

‎app/models/photo.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from odmantic import Model
2+
3+
4+
class PhotoModel(Model):
5+
username: str
6+
message: str
7+
8+
class Config:
9+
collection = "photos"

‎app/requirements.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
fastapi
2+
uvicorn
3+
aiofiles
4+
jinja2
5+
odmantic
6+
pymongo[srv]

‎app/scrapers/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .photo import PhotoScraper
2+
3+
4+
photo_scraper = PhotoScraper()

‎app/scrapers/book.py

Whitespace-only changes.

‎app/scrapers/photo.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import aiohttp
2+
import asyncio
3+
import time
4+
from app.config import get_secret
5+
6+
7+
class PhotoScraper:
8+
9+
NAVER_API_PHOTO = "https://openapi.naver.com/v1/search/image"
10+
KAKAO_API_PHOTO = "https://dapi.kakao.com/v2/search/image"
11+
NAVER_API_ID = get_secret("NAVER_API_ID")
12+
NAVER_API_SECRET = get_secret("NAVER_API_SECRET")
13+
KAKAO_API_ID = get_secret("KAKAO_API_ID")
14+
15+
def __init__(self):
16+
pass
17+
18+
async def fetch(self, session, url, headers):
19+
async with session.get(url, headers=headers) as response:
20+
if response.ok:
21+
return await response.json()
22+
23+
def unit_api(self, keyword: str, page: str, size: int):
24+
return [
25+
{
26+
"name": "naver",
27+
"url": f"{self.NAVER_API_PHOTO}?query={keyword}&display={size}&start={page}",
28+
"headers": {
29+
"X-Naver-Client-Id": self.NAVER_API_ID,
30+
"X-Naver-Client-Secret": self.NAVER_API_SECRET,
31+
},
32+
},
33+
{
34+
"name": "kakao",
35+
"url": f"{self.KAKAO_API_PHOTO}?query={keyword}&page={page}&size={size}",
36+
"headers": {"Authorization": f"KakaoAK {self.KAKAO_API_ID}"},
37+
},
38+
]
39+
40+
async def search(self, keyword: str, total_page: int = 10, size: int = 10):
41+
42+
apis = []
43+
44+
for per_page in range(1, total_page):
45+
apis += self.unit_api(keyword, per_page, size)
46+
47+
# print(apis)
48+
49+
async with aiohttp.ClientSession() as session:
50+
response_tasks = [self.fetch(session, api["url"], api["headers"]) for api in apis]
51+
result = await asyncio.gather(*response_tasks)
52+
print(result)
53+
print(len(apis))
54+
return result
55+
56+
def run(self, keyword: str, total_page: int, size: int = 10):
57+
asyncio.run(self.search(keyword, total_page, size))
58+
59+
60+
if __name__ == "__main__":
61+
start = time.time()
62+
scraper = PhotoScraper()
63+
scraper.run("joy", 100, 10)
64+
end = time.time()
65+
print(end - start)

‎app/statics/css/styles.css

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/* body {
2+
background-color: darkgrey;
3+
} */

‎app/statics/js/scripts.js

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
function init() {
2+
console.log("hello world!");
3+
}
4+
5+
init();

‎app/templates/index.html

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<title>콜렉터</title>
5+
<link href="https://unpkg.com/mvp.css" rel="stylesheet" />
6+
<link
7+
href="{{ url_for('static', path='css/styles.css') }}"
8+
rel="stylesheet"
9+
/>
10+
</head>
11+
<body>
12+
<header>
13+
<h1>콜렉터</h1>
14+
<center>
15+
<form id="search_form" action="/search">
16+
<input
17+
type="search"
18+
placeholder="keyword"
19+
id="search_input"
20+
name="q"
21+
/>
22+
<button type="submit">검색</button>
23+
</form>
24+
</center>
25+
</header>
26+
<main>
27+
{% if result %} {{result}} {% else %}
28+
<center><h3 style="color: gray">Please Input Keyword...</h3></center>
29+
{% endif %}
30+
</main>
31+
<script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
32+
<script src="{{ url_for('static', path='js/scripts.js') }}"></script>
33+
</body>
34+
</html>

‎server.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import uvicorn
2+
3+
if __name__ == "__main__":
4+
uvicorn.run("app.main:app", host="localhost", port=8080, reload=True)

0 commit comments

Comments
 (0)