-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler.py
123 lines (105 loc) · 3.82 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import json
import logging
import os
import time
from typing import Any, Dict, List, Optional
import requests
logging.basicConfig(level=logging.INFO)
def merge_data(prev: List[Dict[str, Any]], curr: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
'''Merge curr into prev, the element in list must have a date field.'''
for c in curr:
x: Optional[Dict[str, Any]] = None
for p in prev:
if p['date'] == c['date']:
x = p
break
if x is None:
prev.append(c)
else:
x.update(c)
return sorted(prev, key=lambda d: d['date'])
def http_get(url: str, field_name: str) -> List[Dict[str, Any]]:
resp = requests.get(url=url)
if resp.status_code == 200:
data = resp.json()
return data[field_name]
else:
logging.warning(f'{url} {resp.status_code} {resp.text}')
if resp.status_code == 429:
exit(0)
return []
def get_coins() -> List[str]:
url = 'https://www.bitstamp.net/api/v2/trading-pairs-info/'
resp = requests.get(url=url)
if resp.status_code == 200:
symbols = resp.json()
coins = [s['name'].split('/')[0].lower() for s in symbols]
coins = sorted(list(set(coins)))
return coins
else:
logging.warning(f'{url} {resp.status_code} {resp.text}')
if resp.status_code == 429:
exit(0)
return []
def get_price(coin: str) -> None:
url = f'https://www.bitstamp.net/api-internal/stats/v1/{coin}/financial/price'
curr = http_get(url=url, field_name='price')
file_path = f'./data/price-{coin}.json'
if os.path.exists(file_path):
with open(file_path, 'rt') as f_in:
prev = json.loads(f_in.read())
else:
prev = []
prev = merge_data(prev, curr)
with open(file_path, 'wt') as f_out:
json.dump(prev, f_out, indent=2)
def get_transactions(coin: str) -> None:
url = f'https://www.bitstamp.net/api-internal/stats/v1/{coin}/network/transactions'
curr = http_get(url=url, field_name='txsStats')
file_path = f'./data/transactions-{coin}.json'
if os.path.exists(file_path):
with open(file_path, 'rt') as f_in:
prev = json.loads(f_in.read())
else:
prev = []
prev = merge_data(prev, curr)
with open(file_path, 'wt') as f_out:
json.dump(prev, f_out, indent=2)
def get_addresses(coin: str) -> None:
url = f'https://www.bitstamp.net/api-internal/stats/v1/{coin}/network/addresses'
curr = http_get(url=url, field_name='addressesStats')
file_path = f'./data/addresses-{coin}.json'
if os.path.exists(file_path):
with open(file_path, 'rt') as f_in:
prev = json.loads(f_in.read())
else:
prev = []
prev = merge_data(prev, curr)
with open(file_path, 'wt') as f_out:
json.dump(prev, f_out, indent=2)
def get_large_transactions(coin: str) -> None:
url = f'https://www.bitstamp.net/api-internal/stats/v1/{coin}/financial/large_transactions'
file_path = f'./data/large_transactions-{coin}.json'
curr = http_get(url=url, field_name='largeTxs')
if os.path.exists(file_path):
with open(file_path, 'rt') as f_in:
prev = json.loads(f_in.read())
else:
prev = []
prev = merge_data(prev, curr)
with open(file_path, 'wt') as f_out:
json.dump(prev, f_out, indent=2)
if __name__ == "__main__":
# // 8000 requests per 10 minutes, see `REQUEST LIMITS` at https://www.bitstamp.net/api/
cooldown_time = 0.075
coins = get_coins()
for coin in coins:
logging.info(coin)
get_price(coin)
time.sleep(cooldown_time)
get_transactions(coin)
time.sleep(cooldown_time)
get_addresses(coin)
time.sleep(cooldown_time)
get_large_transactions(coin)
time.sleep(cooldown_time)