@@ -131,8 +131,11 @@ func (bi *cloudBatchIterator) Next() (SparkArrowBatch, error) {
131
131
link .StartRowOffset ,
132
132
link .RowCount ,
133
133
)
134
+
135
+ cancelCtx , cancelFn := context .WithCancel (bi .ctx )
134
136
task := & cloudFetchDownloadTask {
135
- ctx : bi .ctx ,
137
+ ctx : cancelCtx ,
138
+ cancel : cancelFn ,
136
139
useLz4Compression : bi .cfg .UseLz4Compression ,
137
140
link : link ,
138
141
resultChan : make (chan cloudFetchDownloadTaskResult ),
@@ -147,17 +150,29 @@ func (bi *cloudBatchIterator) Next() (SparkArrowBatch, error) {
147
150
return nil , io .EOF
148
151
}
149
152
150
- return task .GetResult ()
153
+ batch , err := task .GetResult ()
154
+
155
+ // once we've got an errored out task - cancel the remaining ones
156
+ if err != nil {
157
+ bi .Close ()
158
+ return nil , err
159
+ }
160
+
161
+ // explicitly call cancel function on successfully completed task to avoid context leak
162
+ task .cancel ()
163
+ return batch , nil
151
164
}
152
165
153
166
func (bi * cloudBatchIterator ) HasNext () bool {
154
167
return (bi .pendingLinks .Len () > 0 ) || (bi .downloadTasks .Len () > 0 )
155
168
}
156
169
157
170
func (bi * cloudBatchIterator ) Close () {
158
- bi .pendingLinks .Clear () // Clear the list
159
- // TODO: Cancel all download tasks
160
- bi .downloadTasks .Clear () // Clear the list
171
+ bi .pendingLinks .Clear ()
172
+ for bi .downloadTasks .Len () > 0 {
173
+ task := bi .downloadTasks .Dequeue ()
174
+ task .cancel ()
175
+ }
161
176
}
162
177
163
178
type cloudFetchDownloadTaskResult struct {
@@ -167,6 +182,7 @@ type cloudFetchDownloadTaskResult struct {
167
182
168
183
type cloudFetchDownloadTask struct {
169
184
ctx context.Context
185
+ cancel context.CancelFunc
170
186
useLz4Compression bool
171
187
minTimeToExpiry time.Duration
172
188
link * cli_service.TSparkArrowResultLink
@@ -180,9 +196,10 @@ func (cft *cloudFetchDownloadTask) GetResult() (SparkArrowBatch, error) {
180
196
if ok {
181
197
if result .err != nil {
182
198
logger .Debug ().Msgf (
183
- "CloudFetch: failed to download link at offset %d row count %d" ,
199
+ "CloudFetch: failed to download link at offset %d row count %d, reason: %s " ,
184
200
link .StartRowOffset ,
185
201
link .RowCount ,
202
+ result .err .Error (),
186
203
)
187
204
return nil , result .err
188
205
}
0 commit comments