-
Notifications
You must be signed in to change notification settings - Fork 933
/
Copy pathcsv_gpu.hpp
236 lines (216 loc) · 9.98 KB
/
csv_gpu.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <io/utilities/parsing_utils.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
#include <rmm/cuda_stream_view.hpp>
using cudf::device_span;
namespace cudf {
namespace io {
namespace csv {
namespace gpu {
/**
* CSV row parsing context
* NONE: No special context (normal parsing)
* QUOTE: Within a quoted field
* COMMENT: Within a comment line (discard every character until terminator)
* EOF: End state (EOF reached)
*/
enum { ROW_CTX_NONE = 0, ROW_CTX_QUOTE = 1, ROW_CTX_COMMENT = 2, ROW_CTX_EOF = 3 };
constexpr uint32_t rowofs_block_dim = 512;
/// Character block size for gather_row_offsets
constexpr uint32_t rowofs_block_bytes = rowofs_block_dim * 32; // 16KB/threadblock
/**
* Row parsing context with row count
* Format: row_count * 4 + id, where `row_count` is the number of rows
* in a character block, and `id` is the row parser state at the end of the block.
*/
using rowctx32_t = uint32_t;
using rowctx64_t = uint64_t;
/**
* Packed row context format
*
* The 64-bit packed row context format represents the four possible output row context states
* from each of the four possible input row context states.
* Each rowctx32_t value is truncated to 20-bit (limiting the max number of rows
* to 18-bit) and concatenated to form a 80-bit value, whose upper 16 bits are
* always zero (EOF input state implies a zero row count) and therefore
* stored as 64-bit.
*/
using packed_rowctx_t = uint64_t;
/**
* @brief return a row context from a {count, id} pair
*
* The 32-bit row context consists of the 2-bit parser state stored in the lower 2-bits
* and a 30-bit row count in the upper 30 bits.
*/
inline __host__ __device__ rowctx32_t make_row_context(uint32_t row_count, uint32_t out_ctx)
{
return (row_count << 2) + out_ctx;
}
/**
* @brief pack multiple row contexts together
*
* Pack four rowctx32_t values, where each value represents the output row context
* for one of four possible input contexts when parsing a character block.
* Each output state consists of the 2-bit row context state along with a 18-bit row count
* value (row count is assumed to be a local count that fits in 18-bit)
* The four 20-bit values are concatenated to form a 80-bit value, truncated to 64-bit
* since a block starting in a EOF state can only have a zero row count (and the output
* state corresponding to an EOF input state can only be EOF, so only the first 3 output
* states are included as parameters, and the EOF->EOF state transition is hardcoded)
*/
constexpr __host__ __device__ packed_rowctx_t pack_row_contexts(rowctx32_t ctx0,
rowctx32_t ctx1,
rowctx32_t ctx2)
{
return (ctx0) | (static_cast<uint64_t>(ctx1) << 20) | (static_cast<uint64_t>(ctx2) << 40) |
(static_cast<uint64_t>(ROW_CTX_EOF) << 60);
}
/**
* @brief Unpack a row context (select one of the 4 contexts in packed form)
*/
inline __host__ __device__ rowctx32_t get_row_context(packed_rowctx_t packed_ctx, uint32_t ctxid)
{
return static_cast<rowctx32_t>((packed_ctx >> (ctxid * 20)) & ((1 << 20) - 1));
}
/**
* @brief Select the output row context from a given input context and a packed row
* context corresponding to a block of characters, and return the new output context with
* updated total row count.
* The input context is a 64-bit version of the 32-bit single row context as returned
* by make_row_context(), so the maximum row count here is a 62-bit value.
*
* @param sel_ctx input context (2-bit context id, 62-bit row count)
* @param packed_ctx row context of character block
* @return total_row_count * 4 + output context id
*/
inline __host__ __device__ rowctx64_t select_row_context(rowctx64_t sel_ctx,
packed_rowctx_t packed_ctx)
{
auto ctxid = static_cast<uint32_t>(sel_ctx & 3);
rowctx32_t ctx = get_row_context(packed_ctx, ctxid);
return (sel_ctx & ~3) + ctx;
}
/**
* @brief Launches kernel to gather row offsets
*
* This is done in two phases: the first phase returns the possible row counts
* per 16K character block for each possible parsing context at the start of the block,
* along with the resulting parsing context at the end of the block.
* The caller can then compute the actual parsing context at the beginning of each
* individual block and total row count.
* The second phase outputs the location of each row in the block, using the parsing
* context and initial row counter resulting from the previous phase.
* Row parsing context will be updated after phase 2 such that the value contains
* the number of rows starting at byte_range_end or beyond.
*
* @param options Options that control parsing of individual fields
* @param row_ctx Row parsing context (output of phase 1 or input to phase 2)
* @param offsets_out Row offsets (nullptr for phase1, non-null indicates phase 2)
* @param data Base pointer of character data (all row offsets are relative to this)
* @param chunk_size Total number of characters to parse
* @param parse_pos Current parsing position in the file
* @param start_offset Position of the start of the character buffer in the file
* @param data_size CSV file size
* @param byte_range_start Ignore rows starting before this position in the file
* @param byte_range_end In phase 2, store the number of rows beyond range in row_ctx
* @param skip_rows Number of rows to skip (ignored in phase 1)
* @param stream CUDA stream used for device memory operations and kernel launches.
*
* @return Number of row contexts
*/
uint32_t gather_row_offsets(cudf::io::parse_options_view const& options,
uint64_t* row_ctx,
device_span<uint64_t> offsets_out,
device_span<char const> data,
size_t chunk_size,
size_t parse_pos,
size_t start_offset,
size_t data_size,
size_t byte_range_start,
size_t byte_range_end,
size_t skip_rows,
rmm::cuda_stream_view stream);
/**
* Count the number of blank rows in the given row offset array
*
* @param options Options that control parsing of individual fields
* @param data Character data buffer
* @param row_offsets Row offsets in the character data buffer
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
size_t count_blank_rows(cudf::io::parse_options_view const& options,
device_span<char const> data,
device_span<uint64_t const> row_offsets,
rmm::cuda_stream_view stream);
/**
* Remove blank rows in the given row offset array
*
* @param options Options that control parsing of individual fields
* @param data Character data buffer
* @param row_offsets Row offsets in the character data buffer
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
device_span<uint64_t> remove_blank_rows(cudf::io::parse_options_view const& options,
device_span<char const> data,
device_span<uint64_t> row_offsets,
rmm::cuda_stream_view stream);
/**
* @brief Launches kernel for detecting possible dtype of each column of data
*
* @param[in] options Options that control individual field data conversion
* @param[in] data The row-column data
* @param[in] column_flags Flags that control individual column parsing
* @param[in] row_offsets List of row data start positions (offsets)
* @param[in] stream CUDA stream to use
*
* @return stats Histogram of each dtypes' occurrence for each column
*/
std::vector<column_type_histogram> detect_column_types(
cudf::io::parse_options_view const& options,
device_span<char const> data,
device_span<column_parse::flags const> column_flags,
device_span<uint64_t const> row_offsets,
size_t const num_active_columns,
rmm::cuda_stream_view stream);
/**
* @brief Launches kernel for decoding row-column data
*
* @param[in] options Options that control individual field data conversion
* @param[in] data The row-column data
* @param[in] column_flags Flags that control individual column parsing
* @param[in] row_offsets List of row data start positions (offsets)
* @param[in] dtypes List of dtype corresponding to each column
* @param[out] columns Device memory output of column data
* @param[out] valids Device memory output of column valids bitmap data
* @param[out] valid_counts Device memory output of the number of valid fields in each column
* @param[in] stream CUDA stream to use
*/
void decode_row_column_data(cudf::io::parse_options_view const& options,
device_span<char const> data,
device_span<column_parse::flags const> column_flags,
device_span<uint64_t const> row_offsets,
device_span<cudf::data_type const> dtypes,
device_span<void* const> columns,
device_span<cudf::bitmask_type* const> valids,
device_span<size_type> valid_counts,
rmm::cuda_stream_view stream);
} // namespace gpu
} // namespace csv
} // namespace io
} // namespace cudf