-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathschema.1.cql
68 lines (60 loc) · 1.64 KB
/
schema.1.cql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
-- To run:
-- cqlsh --ssl -f schema.1.cql
-- SCRP
-- drop keyspace scrp;
--CREATE KEYSPACE scrp WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '2'} AND durable_writes = true; --Use this for production
create keyspace scrp WITH REPLICATION = { 'class':'NetworkTopologyStrategy', 'DC1':'1' };
use scrp;
create table queries (
id timeuuid,
domain text,
filter text,
PRIMARY KEY (id)
);
create table urls (
url text,
seq timeuuid, --sequence it came in/last updated
status int, --0 untouched, 200=OK etc
sched timestamp, --scheduled for processing
completed boolean,
mid text, --machine id
qid timeuuid, --query id
attempt timeuuid,
attempts int,
PRIMARY KEY (url)
);
CREATE INDEX urls_qid_idx ON urls ( qid );
CREATE INDEX urls_completed_idx ON urls ( completed );
create table failures (
url text,
seq timeuuid, --sequence it came in/last updated
status int, --0 untouched, 200=OK etc
sched timestamp, --scheduled for processing
mid text, --machine id
qid timeuuid, --query ids
attempts int,
PRIMARY KEY (url)
);
create table successes (
url text,
seq timeuuid, --sequence it came in/last updated
status int, --0 untouched, 200=OK etc
sched timestamp, --scheduled for processing
mid text, --machine id
qid timeuuid, --query id
size bigint,
attempts int,
PRIMARY KEY (url)
);
--Successful scrapes
create table content (
url text,
seq timeuuid, --sequence it came in/last updated
mid text, --machine id
qid timeuuid, --query id
type text, --type of data
params frozen<map<text,text>>,
raw text,
PRIMARY KEY (url)
);
CREATE INDEX content_type_idx ON content ( type );