-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_openiterable.py
153 lines (133 loc) · 4.41 KB
/
test_openiterable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: utf-8 -*-
import pytest
from iterable.helpers.detect import open_iterable
from fixdata import FIXTURES, FIXTURES_TYPES
class TestOpenIterable:
def test_iterate_plain_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_delimiter_notmatch_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv', iterableargs={'delimiter' : ';'})
n = 0
for row in iterable:
assert row != FIXTURES[n]
n += 1
iterable.close()
def test_iterate_gzip_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.gz')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_lzma_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.xz')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_zstd_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.zst')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_brotli_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.br')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_bzip2_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.bz2')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_lz4_csv(self):
iterable = open_iterable('fixtures/2cols6rows.csv.lz4')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_xls(self):
iterable = open_iterable('fixtures/2cols6rows.xls')
n = 0
for row in iterable:
assert row == FIXTURES_TYPES[n]
n += 1
iterable.close()
def test_iterate_plain_xlsx(self):
iterable = open_iterable('fixtures/2cols6rows.xlsx')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_parquet(self):
iterable = open_iterable('fixtures/2cols6rows.parquet')
n = 0
for row in iterable:
assert row == FIXTURES_TYPES[n]
n += 1
iterable.close()
def test_iterate_plain_orc(self):
iterable = open_iterable('fixtures/2cols6rows.orc')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_avro(self):
iterable = open_iterable('fixtures/2cols6rows.avro')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_bson(self):
iterable = open_iterable('fixtures/2cols6rows_flat.bson')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_json(self):
iterable = open_iterable('fixtures/2cols6rows_array.json')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_jsonl(self):
iterable = open_iterable('fixtures/2cols6rows_flat.jsonl')
n = 0
for row in iterable:
assert row == FIXTURES[n]
n += 1
iterable.close()
def test_iterate_plain_xml(self):
iterable = open_iterable('fixtures/books.xml', iterableargs={'tagname' : 'book'})
n = 0
years = [2005, 2005, 2003]
for row in iterable:
assert int(row['year']) == years[n]
n += 1
iterable.close()
def test_iterate_lz4_xml(self):
iterable = open_iterable('fixtures/books.xml.lz4', iterableargs={'tagname' : 'book'})
n = 0
years = [2005, 2005, 2003]
for row in iterable:
assert int(row['year']) == years[n]
n += 1
iterable.close()