-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdx_describe.go
211 lines (186 loc) · 5.21 KB
/
dx_describe.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
package dxda
import (
"context"
"encoding/json"
"net/http"
"strings"
)
// Limit on the number of objects that the bulk-describe API can take
const (
maxNumObjectsInDescribe = 1000
numRetriesDefault = 10
)
// Description of a DNAx data object
type DxDescribeDataObject struct {
Id string
ProjId string
Name string
State string
ArchivalState string
Folder string
Size int64
Parts map[string]DXPart // a list of parts for a DNAx file
Symlink *DXSymlink
}
// description of part of a file
type DXPart struct {
// we add the part-id in a post-processing step
Id int
// these fields are in the input JSON
MD5 string `json:"md5"`
Size int `json:"size"`
}
// a full URL for symbolic links, with a corresponding MD5 checksum for
// the entire file.
// Drive and MD5 of symlnk
type DXSymlink struct {
Drive string
MD5 string
}
type Request struct {
Objects []string `json:"objects"`
DescribeOptions map[string]map[string]bool `json:"describe"`
}
type RequestWithScope struct {
Objects []string `json:"id"`
Scope map[string]string `json:"scope"`
DescribeOptions map[string]map[string]bool `json:"describe"`
}
type Reply struct {
Results []DxDescribeRawTop `json:"results"`
}
type DxDescribeRawTop struct {
Describe DxDescribeRaw `json:"describe"`
}
type DxSymlinkRaw struct {
Url string `json:"object"`
}
type DxDescribeRaw struct {
Id string `json:"id"`
ProjId string `json:"project"`
Name string `json:"name"`
State string `json:"state"`
ArchivalState string `json:"archivalState"`
Size int64 `json:"size"`
Parts map[string]DXPart `json:"parts"`
Symlink *DxSymlinkRaw `json:"symlinkPath,omitempty"`
MD5 *string `json:"md5,omitempty"`
Drive *string `json:"drive,omitempty"`
}
// Describe a large number of file-ids in one API call.
func submit(
ctx context.Context,
httpClient *http.Client,
dxEnv *DXEnvironment,
projectId string,
fileIds []string) (map[string]DxDescribeDataObject, error) {
// Limit the number of fields returned, because by default we
// get too much information, which is a burden on the server side.
describeOptions := map[string]map[string]bool{
"fields": map[string]bool{
"id": true,
"project": true,
"name": true,
"state": true,
"archivalState": true,
"size": true,
"parts": true,
"symlinkPath": true,
"drive": true,
"md5": true,
},
}
var payload []byte
// If given a valid project or container provide the scope parameter to reduce load on the backend
if strings.HasPrefix(projectId, "project-") || strings.HasPrefix(projectId, "container-") {
scope := map[string]string{
"project": projectId,
}
request := RequestWithScope{
Objects: fileIds,
Scope: scope,
DescribeOptions: describeOptions,
}
payload, err = json.Marshal(request)
if err != nil {
return nil, err
}
} else {
request := Request{
Objects: fileIds,
DescribeOptions: describeOptions,
}
payload, err = json.Marshal(request)
if err != nil {
return nil, err
}
}
//fmt.Printf("payload = %s", string(payload))
repJs, err := DxAPI(ctx, httpClient, numRetriesDefault, dxEnv, "system/findDataObjects", string(payload))
if err != nil {
return nil, err
}
var reply Reply
err = json.Unmarshal(repJs, &reply)
if err != nil {
return nil, err
}
var files = make(map[string]DxDescribeDataObject)
for _, descRawTop := range reply.Results {
descRaw := descRawTop.Describe
// If this is a symlink, create structure with
// all the relevant information.
var symlink *DXSymlink = nil
if descRaw.Parts == nil && descRaw.Drive != nil {
symlink = &DXSymlink{
MD5: *descRaw.MD5,
Drive: *descRaw.Drive,
}
}
desc := DxDescribeDataObject{
Id: descRaw.Id,
ProjId: descRaw.ProjId,
Name: descRaw.Name,
State: descRaw.State,
ArchivalState: descRaw.ArchivalState,
Size: descRaw.Size,
Parts: descRaw.Parts,
Symlink: symlink,
}
//fmt.Printf("%v\n", desc)
files[desc.Id] = desc
}
return files, nil
}
func DxDescribeBulkObjects(
ctx context.Context,
httpClient *http.Client,
dxEnv *DXEnvironment,
projectId string,
objIds []string) (map[string]DxDescribeDataObject, error) {
var gMap = make(map[string]DxDescribeDataObject)
if len(objIds) == 0 {
return gMap, nil
}
// split into limited batchs
batchSize := maxNumObjectsInDescribe
var batches [][]string
for batchSize < len(objIds) {
head := objIds[0:batchSize:batchSize]
objIds = objIds[batchSize:]
batches = append(batches, head)
}
// Don't forget the tail of the requests, that is smaller than the batch size
batches = append(batches, objIds)
for _, objIdBatch := range batches {
m, err := submit(ctx, httpClient, dxEnv, projectId, objIdBatch)
if err != nil {
return nil, err
}
// add the results to the total result map
for key, value := range m {
gMap[key] = value
}
}
return gMap, nil
}