-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02-populate.R
128 lines (115 loc) · 2.78 KB
/
02-populate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
library(dplyr)
library(DBI)
library(duckdb)
duckdb_con <- dbConnect(
duckdb(),
dbdir = "formd.duckdb",
read_only = FALSE
)
# this script is meant for a first load run
# Populate ---------------------------------------------------------------------
q_ins_form_d <-
"INSERT OR REPLACE INTO form_d
SELECT
ACCESSIONNUMBER,
FILING_DATE,
SIC_CODE,
SUBMISSIONTYPE,
OVER100PERSONSFLAG,
OVER100ISSUERFLAG
FROM 'formd-unzipped/*/FORMDSUBMISSION.tsv';
"
dbExecute(duckdb_con, q_ins_form_d)
# or use dplyr::rows_insert() which is supported on DuckDB despite the docs not mentioning it
q_ins_issuers <-
"INSERT OR REPLACE INTO issuers
SELECT
ACCESSIONNUMBER,
IS_PRIMARYISSUER_FLAG,
ISSUER_SEQ_KEY,
ENTITYNAME,
CITY,
STATEORCOUNTRY,
STATEORCOUNTRYDESCRIPTION,
JURISDICTIONOFINC,
ENTITYTYPE,
ENTITYTYPEOTHERDESC,
YEAROFINC_TIMESPAN_CHOICE,
YEAROFINC_VALUE_ENTERED
FROM 'formd-unzipped/*/ISSUERS.tsv';
"
dbExecute(duckdb_con, q_ins_issuers)
q_ins_offering <-
"INSERT OR REPLACE INTO offering
SELECT
ACCESSIONNUMBER,
INDUSTRYGROUPTYPE,
INVESTMENTFUNDTYPE,
IS40ACT,
REVENUERANGE,
AGGREGATENETASSETVALUERANGE,
FEDERALEXEMPTIONS_ITEMS_LIST,
ISAMENDMENT,
PREVIOUSACCESSIONNUMBER,
SALE_DATE,
YETTOOCCUR,
MORETHANONEYEAR,
ISEQUITYTYPE,
ISDEBTTYPE,
ISOPTIONTOACQUIRETYPE,
ISSECURITYTOBEACQUIREDTYPE,
ISPOOLEDINVESTMENTFUNDTYPE,
ISTENANTINCOMMONTYPE,
ISMINERALPROPERTYTYPE,
ISOTHERTYPE,
DESCRIPTIONOFOTHERTYPE,
ISBUSINESSCOMBINATIONTRANS,
BUSCOMBCLARIFICATIONOFRESP,
MINIMUMINVESTMENTACCEPTED,
OVER100RECIPIENTFLAG,
TOTALOFFERINGAMOUNT,
TOTALAMOUNTSOLD,
TOTALREMAINING,
SALESAMTCLARIFICATIONOFRESP,
HASNONACCREDITEDINVESTORS,
NUMBERNONACCREDITEDINVESTORS,
TOTALNUMBERALREADYINVESTED,
SALESCOMM_DOLLARAMOUNT,
SALESCOMM_ISESTIMATE,
FINDERSFEE_DOLLARAMOUNT,
FINDERSFEE_ISESTIMATE,
FINDERFEECLARIFICATIONOFRESP,
GROSSPROCEEDSUSED_DOLLARAMOUNT,
GROSSPROCEEDSUSED_ISESTIMATE,
GROSSPROCEEDSUSED_CLAROFRESP,
AUTHORIZEDREPRESENTATIVE
FROM 'formd-unzipped/*/OFFERING.tsv';
"
dbExecute(duckdb_con, q_ins_offering)
q_ins_recipients <-
"INSERT OR REPLACE INTO recipients
SELECT
ACCESSIONNUMBER,
RECIPIENT_SEQ_KEY,
RECIPIENTNAME,
RECIPIENTCRDNUMBER,
ASSOCIATEDBDNAME,
ASSOCIATEDBDCRDNUMBER,
STREET1,
STREET2,
CITY,
STATEORCOUNTRY,
STATEORCOUNTRYDESCRIPTION,
ZIPCODE,
STATES_OR_VALUE_LIST,
DESCRIPTIONS_LIST,
FOREIGNSOLICITATION
FROM 'formd-unzipped/*/RECIPIENTS.tsv';
"
dbExecute(duckdb_con, q_ins_recipients)
dbGetQuery(duckdb_con, "PRAGMA show_tables_expanded;") |>
as_tibble() |>
filter(!temporary) |>
tidyr::unnest_longer(c(column_names, column_types)) |>
print(n = Inf)
dbDisconnect(duckdb_con)