-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathfile_factory.py
159 lines (135 loc) · 5.52 KB
/
file_factory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
__author__ = "shreyassharma"
"""
Copyright 2023 The aiXplain SDK authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Author: aiXplain team
Date: March 20th 2023
Description:
File Factory Class
"""
import os
import validators
import filetype
from aixplain.enums.storage_type import StorageType
from aixplain.enums.license import License
from aixplain.utils.file_utils import upload_data
from typing import Any, Dict, Text, Union, Optional, List
MB_1 = 1048576
MB_25 = 26214400
MB_50 = 52428800
MB_300 = 314572800
MB_500 = 524288000
class FileFactory:
@classmethod
def upload(
cls, local_path: Text, tags: Optional[List[Text]] = None, license: Optional[License] = None, is_temp: bool = True
) -> Text:
"""
Uploads a file to an S3 bucket.
Args:
local_path (Text): The local path of the file to upload.
tags (List[Text], optional): tags of the file
license (License, optional): the license for the file
is_temp (bool): specify if the file that will be upload is a temporary file
Returns:
Text: The S3 path where the file was uploaded.
Raises:
FileNotFoundError: If the local file is not found.
Exception: If the file size exceeds the maximum allowed size.
"""
if os.path.exists(local_path) is False:
raise FileNotFoundError(f'File Upload Error: local file "{local_path}" not found.')
# mime type format: {type}/{extension}
mime_type = filetype.guess_mime(local_path)
if mime_type is None:
content_type = "text/csv"
else:
content_type = mime_type
type_to_max_size = {
"audio": MB_50,
"application": MB_25,
"video": MB_300,
"image": MB_25,
"other": MB_50,
"database": MB_500,
}
if local_path.endswith(".db"):
ftype = "database"
elif mime_type is None or mime_type.split("/")[0] not in type_to_max_size:
ftype = "other"
else:
ftype = mime_type.split("/")[0]
if os.path.getsize(local_path) > type_to_max_size[ftype]:
raise Exception(
f'File Upload Error: local file "{local_path}" of type "{mime_type}" exceeds {type_to_max_size[ftype] / MB_1} MB.'
)
if is_temp is False:
s3_path = upload_data(file_name=local_path, tags=tags, license=license, is_temp=is_temp, content_type=content_type)
else:
s3_path = upload_data(file_name=local_path)
return s3_path
@classmethod
def check_storage_type(cls, input_link: Any) -> StorageType:
"""Check whether a path is a URL (s3 link or HTTP link), a file or a textual content
Args:
input_link (Any): path to be checked
Returns:
StorageType: URL, TEXT or FILE
"""
if os.path.exists(input_link) is True and os.path.isfile(input_link) is True:
return StorageType.FILE
elif (
input_link.startswith("s3://")
or input_link.startswith("http://")
or input_link.startswith("https://")
or validators.url(input_link)
):
return StorageType.URL
else:
return StorageType.TEXT
@classmethod
def to_link(cls, data: Union[Text, Dict], **kwargs) -> Union[Text, Dict]:
"""If user input data is a local file, upload to aiXplain platform
Args:
data (Union[Text, Dict]): input data
Returns:
Union[Text, Dict]: input links/texts
"""
if isinstance(data, dict):
for key in data:
if isinstance(data[key], str):
if cls.check_storage_type(data[key]) == StorageType.FILE:
data[key] = cls.upload(local_path=data[key], **kwargs)
elif isinstance(data, str):
if cls.check_storage_type(data) == StorageType.FILE:
data = cls.upload(local_path=data, **kwargs)
return data
@classmethod
def create(
cls, local_path: Text, tags: Optional[List[Text]] = None, license: Optional[License] = None, is_temp: bool = False
) -> Text:
"""
Uploads a file to an S3 bucket.
Args:
local_path (Text): The local path of the file to upload.
tags (List[Text], optional): tags of the file
license (License, optional): the license for the file
is_temp (bool): specify if the file that will be upload is a temporary file
Returns:
Text: The S3 path where the file was uploaded.
Raises:
FileNotFoundError: If the local file is not found.
Exception: If the file size exceeds the maximum allowed size.
"""
assert (
license is not None if is_temp is False else True
), "File Asset Creation Error: To upload a non-temporary file, you need to specify the `license`."
return cls.upload(local_path=local_path, tags=tags, license=license, is_temp=is_temp)