|
4 | 4 | to stop exporting. This end ledger is determined by when the Airflow DAG is run. This DAG should be triggered manually
|
5 | 5 | when initializing the tables in order to catch up to the current state in the network, but should not be scheduled to run constantly.
|
6 | 6 | """
|
7 |
| -import ast |
8 |
| -import datetime |
9 |
| -import json |
| 7 | +from ast import literal_eval |
| 8 | +from datetime import datetime |
| 9 | +from json import loads |
10 | 10 |
|
11 | 11 | from airflow import DAG
|
12 | 12 | from airflow.models import Variable
|
|
23 | 23 | dag = DAG(
|
24 | 24 | "bucket_list_export",
|
25 | 25 | default_args=get_default_dag_args(),
|
26 |
| - start_date=datetime.datetime(2021, 10, 15), |
27 |
| - end_date=datetime.datetime(2021, 10, 15), |
| 26 | + start_date=datetime(2021, 10, 15), |
| 27 | + end_date=datetime(2021, 10, 15), |
28 | 28 | description="This DAG loads a point forward view of state tables. Caution: Does not capture historical changes!",
|
29 | 29 | schedule_interval="@daily",
|
30 | 30 | params={
|
31 | 31 | "alias": "bucket",
|
32 | 32 | },
|
33 |
| - user_defined_filters={"fromjson": lambda s: json.loads(s)}, |
| 33 | + user_defined_filters={ |
| 34 | + "fromjson": lambda s: loads(s), |
| 35 | + "literal_eval": lambda e: literal_eval(e), |
| 36 | + }, |
34 | 37 | user_defined_macros={
|
35 | 38 | "subtract_data_interval": macros.subtract_data_interval,
|
36 | 39 | "batch_run_date_as_datetime_string": macros.batch_run_date_as_datetime_string,
|
37 | 40 | },
|
38 | 41 | )
|
39 | 42 |
|
40 |
| -file_names = Variable.get("output_file_names", deserialize_json=True) |
41 | 43 | table_names = Variable.get("table_ids", deserialize_json=True)
|
42 |
| -internal_project = Variable.get("bq_project") |
43 |
| -internal_dataset = Variable.get("bq_dataset") |
44 |
| -public_project = Variable.get("public_project") |
45 |
| -public_dataset = Variable.get("public_dataset") |
46 |
| -use_testnet = ast.literal_eval(Variable.get("use_testnet")) |
47 |
| -use_futurenet = ast.literal_eval(Variable.get("use_futurenet")) |
48 |
| - |
| 44 | +internal_project = "{{ var.value.bq_project }}" |
| 45 | +internal_dataset = "{{ var.value.bq_dataset }}" |
| 46 | +public_project = "{{ var.value.public_project }}" |
| 47 | +public_dataset = "{{ var.value.public_dataset }}" |
| 48 | +public_dataset_new = "{{ var.value.public_dataset_new }}" |
| 49 | +use_testnet = "{{ var.value.use_testnet | literal_eval }}" |
| 50 | +use_futurenet = "{{ var.value.use_futurenet | literal_eval }}" |
49 | 51 | """
|
50 | 52 | The time task reads in the execution time of the current run, as well as the next
|
51 | 53 | execution time. It converts these two times into ledger ranges.
|
|
60 | 62 | dag,
|
61 | 63 | "bucket",
|
62 | 64 | "export_accounts",
|
63 |
| - file_names["accounts"], |
| 65 | + "{{ var.json.output_file_names.accounts }}", |
64 | 66 | use_testnet=use_testnet,
|
65 | 67 | use_futurenet=use_futurenet,
|
66 | 68 | use_gcs=True,
|
|
69 | 71 | dag,
|
70 | 72 | "bucket",
|
71 | 73 | "export_claimable_balances",
|
72 |
| - file_names["claimable_balances"], |
| 74 | + "{{ var.json.output_file_names.claimable_balances }}", |
73 | 75 | use_testnet=use_testnet,
|
74 | 76 | use_futurenet=use_futurenet,
|
75 | 77 | use_gcs=True,
|
|
78 | 80 | dag,
|
79 | 81 | "bucket",
|
80 | 82 | "export_offers",
|
81 |
| - file_names["offers"], |
| 83 | + "{{ var.json.output_file_names.offers }}", |
82 | 84 | use_testnet=use_testnet,
|
83 | 85 | use_futurenet=use_futurenet,
|
84 | 86 | use_gcs=True,
|
|
87 | 89 | dag,
|
88 | 90 | "bucket",
|
89 | 91 | "export_pools",
|
90 |
| - file_names["liquidity_pools"], |
| 92 | + "{{ var.json.output_file_names.liquidity_pools }}", |
91 | 93 | use_testnet=use_testnet,
|
92 | 94 | use_futurenet=use_futurenet,
|
93 | 95 | use_gcs=True,
|
|
96 | 98 | dag,
|
97 | 99 | "bucket",
|
98 | 100 | "export_signers",
|
99 |
| - file_names["signers"], |
| 101 | + "{{ var.json.output_file_names.signers }}", |
100 | 102 | use_testnet=use_testnet,
|
101 | 103 | use_futurenet=use_futurenet,
|
102 | 104 | use_gcs=True,
|
|
105 | 107 | dag,
|
106 | 108 | "bucket",
|
107 | 109 | "export_trustlines",
|
108 |
| - file_names["trustlines"], |
| 110 | + "{{ var.json.output_file_names.trustlines }}", |
109 | 111 | use_testnet=use_testnet,
|
110 | 112 | use_futurenet=use_futurenet,
|
111 | 113 | use_gcs=True,
|
|
131 | 133 | dag, internal_project, internal_dataset, table_names["accounts"]
|
132 | 134 | )
|
133 | 135 | delete_acc_pub_task = build_delete_data_task(
|
134 |
| - dag, public_project, public_dataset, table_names["accounts"] |
| 136 | + dag, public_project, public_dataset, table_names["accounts"], "pub" |
135 | 137 | )
|
136 | 138 | delete_bal_task = build_delete_data_task(
|
137 | 139 | dag, internal_project, internal_dataset, table_names["claimable_balances"]
|
138 | 140 | )
|
139 | 141 | delete_bal_pub_task = build_delete_data_task(
|
140 |
| - dag, public_project, public_dataset, table_names["claimable_balances"] |
| 142 | + dag, public_project, public_dataset, table_names["claimable_balances"], "pub" |
141 | 143 | )
|
142 | 144 | delete_off_task = build_delete_data_task(
|
143 | 145 | dag, internal_project, internal_dataset, table_names["offers"]
|
144 | 146 | )
|
145 | 147 | delete_off_pub_task = build_delete_data_task(
|
146 |
| - dag, public_project, public_dataset, table_names["offers"] |
| 148 | + dag, public_project, public_dataset, table_names["offers"], "pub" |
147 | 149 | )
|
148 | 150 | delete_pool_task = build_delete_data_task(
|
149 | 151 | dag, internal_project, internal_dataset, table_names["liquidity_pools"]
|
150 | 152 | )
|
151 | 153 | delete_pool_pub_task = build_delete_data_task(
|
152 |
| - dag, public_project, public_dataset, table_names["liquidity_pools"] |
| 154 | + dag, public_project, public_dataset, table_names["liquidity_pools"], "pub" |
153 | 155 | )
|
154 | 156 | delete_sign_task = build_delete_data_task(
|
155 | 157 | dag, internal_project, internal_dataset, table_names["signers"]
|
156 | 158 | )
|
157 | 159 | delete_sign_pub_task = build_delete_data_task(
|
158 |
| - dag, public_project, public_dataset, table_names["signers"] |
| 160 | + dag, public_project, public_dataset, table_names["signers"], "pub" |
159 | 161 | )
|
160 | 162 | delete_trust_task = build_delete_data_task(
|
161 | 163 | dag, internal_project, internal_dataset, table_names["trustlines"]
|
162 | 164 | )
|
163 | 165 | delete_trust_pub_task = build_delete_data_task(
|
164 |
| - dag, public_project, public_dataset, table_names["trustlines"] |
| 166 | + dag, public_project, public_dataset, table_names["trustlines"], "pub" |
165 | 167 | )
|
166 | 168 |
|
167 | 169 | """
|
|
244 | 246 | "",
|
245 | 247 | partition=True,
|
246 | 248 | cluster=True,
|
| 249 | + dataset_type="pub", |
247 | 250 | )
|
248 | 251 | send_bal_to_pub_task = build_gcs_to_bq_task(
|
249 | 252 | dag,
|
|
254 | 257 | "",
|
255 | 258 | partition=True,
|
256 | 259 | cluster=True,
|
| 260 | + dataset_type="pub", |
257 | 261 | )
|
258 | 262 | send_off_to_pub_task = build_gcs_to_bq_task(
|
259 | 263 | dag,
|
|
264 | 268 | "",
|
265 | 269 | partition=True,
|
266 | 270 | cluster=True,
|
| 271 | + dataset_type="pub", |
267 | 272 | )
|
268 | 273 | send_pool_to_pub_task = build_gcs_to_bq_task(
|
269 | 274 | dag,
|
|
274 | 279 | "",
|
275 | 280 | partition=True,
|
276 | 281 | cluster=True,
|
| 282 | + dataset_type="pub", |
277 | 283 | )
|
278 | 284 | send_sign_to_pub_task = build_gcs_to_bq_task(
|
279 | 285 | dag,
|
|
284 | 290 | "",
|
285 | 291 | partition=True,
|
286 | 292 | cluster=True,
|
| 293 | + dataset_type="pub", |
287 | 294 | )
|
288 | 295 | send_trust_to_pub_task = build_gcs_to_bq_task(
|
289 | 296 | dag,
|
|
294 | 301 | "",
|
295 | 302 | partition=True,
|
296 | 303 | cluster=True,
|
| 304 | + dataset_type="pub", |
297 | 305 | )
|
298 | 306 |
|
299 | 307 | (
|
|
0 commit comments