Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

AUTO_ALIAS #26

Merged
merged 3 commits into from
May 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ Here's a summary of the options available.
| PREVIEW_ROWS | 1000 | The number of rows to insert to the data store. Set to 0 to insert all rows |
| QSV_DEDUP | `True` | Automatically deduplicate rows? |
| DEFAULT_EXCEL_SHEET | 0 | The zero-based index of the Excel sheet to export to CSV and insert into the Datastore. Negative values are accepted, i.e. -1 is the last sheet, -2 is 2nd to the last, etc. |
| AUTO_ALIAS | `True` | Automatically create a resource alias - RESOURCE_NAME-PACKAGE_NAME-OWNER_ORG, that's easier to use in API calls and with scheming datastore_choices helper |
| WRITE_ENGINE_URL | | The Postgres connection string to use to write to the Datastore using Postgres COPY. This should be **similar** to your `ckan.datastore.write_url`, except you'll need to specify a new role with SUPERUSER privileges, |

> NOTE: To do native PostgreSQL operations like TRUNCATE, VACUUM and COPY, a new
Expand Down
50 changes: 46 additions & 4 deletions datapusher/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
else:
QSV_DEDUP = True

if web.app.config.get('AUTO_ALIAS') in ['False', 'FALSE', '0', False, 0]:
AUTO_ALIAS = False
else:
AUTO_ALIAS = True

if web.app.config.get('SSL_VERIFY') in ['False', 'FALSE', '0', False, 0]:
SSL_VERIFY = False
else:
Expand Down Expand Up @@ -119,7 +124,7 @@ def as_dict(self):

"""
if self.response and len(self.response) > 200:
response = self.response[:200] + str.encode('...')
response = str.encode(self.response[:200] + '...')
else:
response = self.response
return {
Expand Down Expand Up @@ -236,15 +241,16 @@ def datastore_resource_exists(resource_id, api_key, ckan_url):


def send_resource_to_datastore(resource, headers, api_key, ckan_url,
records, is_it_the_last_chunk, ):
records, aliases, calculate_record_count, ):
"""
Stores records in CKAN datastore
"""
request = {'resource_id': resource['id'],
'fields': headers,
'force': True,
'records': records,
'calculate_record_count': is_it_the_last_chunk}
'aliases': aliases,
'calculate_record_count': calculate_record_count}

url = get_url('datastore_create', ckan_url)
r = requests.post(url,
Expand Down Expand Up @@ -291,6 +297,22 @@ def get_resource(resource_id, ckan_url, api_key):
return r.json()['result']


def get_package(package_id, ckan_url, api_key):
"""
Gets available information about a package from CKAN
"""
url = get_url('package_show', ckan_url)
r = requests.post(url,
verify=SSL_VERIFY,
data=json.dumps({'id': package_id}),
headers={'Content-Type': 'application/json',
'Authorization': api_key}
)
check_response(r, url, 'CKAN')

return r.json()['result']


def validate_input(input):
# Especially validate metadata which is provided by the user
if 'metadata' not in input:
Expand Down Expand Up @@ -598,7 +620,7 @@ def push_to_datastore(task_id, input, dry_run=False):

# first, let's create an empty datastore table w/ guessed types
send_resource_to_datastore(resource, headers_dicts, api_key, ckan_url,
records=None, is_it_the_last_chunk=False)
records=None, aliases=None, calculate_record_count=False)

# Guess the delimiter used in the file for copy
with open(tmp.name, 'rb') as f:
Expand Down Expand Up @@ -656,6 +678,26 @@ def push_to_datastore(task_id, input, dry_run=False):
resource['datastore_active'] = True
update_resource(resource, api_key, ckan_url)

if AUTO_ALIAS:
# get package info, so we can construct the alias
package = get_package(resource['package_id'], ckan_url, api_key)

resource_name = resource.get('name')
package_name = package.get('name')
owner_org = package.get('organization')
if owner_org:
owner_org_name = owner_org.get('name')
if resource_name and package_name and owner_org_name:
alias = f"{resource_name}-{package_name}-{owner_org_name}"
else:
alias = None

# tell CKAN to calculate_record_count and set alias if set
send_resource_to_datastore(resource, headers_dicts, api_key, ckan_url,
records=None, aliases=alias, calculate_record_count=True)
if alias:
logger.info('Created alias: {}'.format(alias))

# cleanup temporary files
if os.path.exists(tmp.name + ".idx"):
os.remove(tmp.name + ".idx")
Expand Down
2 changes: 2 additions & 0 deletions datapusher/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
# set this to the same value as your ckan.datastore.write_url
WRITE_ENGINE_URL = os.environ.get('DATAPUSHER_WRITE_ENGINE_URL', 'postgresql://datapusher:THEPASSWORD@localhost/datastore_default')

AUTO_ALIAS = bool(int(os.environ.get('DATAPUSHER_AUTO_ALIAS', '1')))

# qsv settings
QSV_BIN = os.environ.get('DATAPUSHER_QSV_BIN', '/usr/local/bin/qsvlite')
QSV_DEDUP = bool(int(os.environ.get('DATAPUSHER_QSV_DEDUP', '1')))
Expand Down