diff --git a/README.md b/README.md index 5eede72..9391560 100644 --- a/README.md +++ b/README.md @@ -244,6 +244,7 @@ Here's a summary of the options available. | PREVIEW_ROWS | 1000 | The number of rows to insert to the data store. Set to 0 to insert all rows | | QSV_DEDUP | `True` | Automatically deduplicate rows? | | DEFAULT_EXCEL_SHEET | 0 | The zero-based index of the Excel sheet to export to CSV and insert into the Datastore. Negative values are accepted, i.e. -1 is the last sheet, -2 is 2nd to the last, etc. | +| AUTO_ALIAS | `True` | Automatically create a resource alias - RESOURCE_NAME-PACKAGE_NAME-OWNER_ORG, that's easier to use in API calls and with scheming datastore_choices helper | | WRITE_ENGINE_URL | | The Postgres connection string to use to write to the Datastore using Postgres COPY. This should be **similar** to your `ckan.datastore.write_url`, except you'll need to specify a new role with SUPERUSER privileges, | > NOTE: To do native PostgreSQL operations like TRUNCATE, VACUUM and COPY, a new diff --git a/datapusher/jobs.py b/datapusher/jobs.py index 8fb8e5b..c2f68eb 100644 --- a/datapusher/jobs.py +++ b/datapusher/jobs.py @@ -54,6 +54,11 @@ else: QSV_DEDUP = True +if web.app.config.get('AUTO_ALIAS') in ['False', 'FALSE', '0', False, 0]: + AUTO_ALIAS = False +else: + AUTO_ALIAS = True + if web.app.config.get('SSL_VERIFY') in ['False', 'FALSE', '0', False, 0]: SSL_VERIFY = False else: @@ -119,7 +124,7 @@ def as_dict(self): """ if self.response and len(self.response) > 200: - response = self.response[:200] + str.encode('...') + response = str.encode(self.response[:200] + '...') else: response = self.response return { @@ -236,7 +241,7 @@ def datastore_resource_exists(resource_id, api_key, ckan_url): def send_resource_to_datastore(resource, headers, api_key, ckan_url, - records, is_it_the_last_chunk, ): + records, aliases, calculate_record_count, ): """ Stores records in CKAN datastore """ @@ -244,7 +249,8 @@ def send_resource_to_datastore(resource, headers, api_key, ckan_url, 'fields': headers, 'force': True, 'records': records, - 'calculate_record_count': is_it_the_last_chunk} + 'aliases': aliases, + 'calculate_record_count': calculate_record_count} url = get_url('datastore_create', ckan_url) r = requests.post(url, @@ -291,6 +297,22 @@ def get_resource(resource_id, ckan_url, api_key): return r.json()['result'] +def get_package(package_id, ckan_url, api_key): + """ + Gets available information about a package from CKAN + """ + url = get_url('package_show', ckan_url) + r = requests.post(url, + verify=SSL_VERIFY, + data=json.dumps({'id': package_id}), + headers={'Content-Type': 'application/json', + 'Authorization': api_key} + ) + check_response(r, url, 'CKAN') + + return r.json()['result'] + + def validate_input(input): # Especially validate metadata which is provided by the user if 'metadata' not in input: @@ -598,7 +620,7 @@ def push_to_datastore(task_id, input, dry_run=False): # first, let's create an empty datastore table w/ guessed types send_resource_to_datastore(resource, headers_dicts, api_key, ckan_url, - records=None, is_it_the_last_chunk=False) + records=None, aliases=None, calculate_record_count=False) # Guess the delimiter used in the file for copy with open(tmp.name, 'rb') as f: @@ -656,6 +678,26 @@ def push_to_datastore(task_id, input, dry_run=False): resource['datastore_active'] = True update_resource(resource, api_key, ckan_url) + if AUTO_ALIAS: + # get package info, so we can construct the alias + package = get_package(resource['package_id'], ckan_url, api_key) + + resource_name = resource.get('name') + package_name = package.get('name') + owner_org = package.get('organization') + if owner_org: + owner_org_name = owner_org.get('name') + if resource_name and package_name and owner_org_name: + alias = f"{resource_name}-{package_name}-{owner_org_name}" + else: + alias = None + + # tell CKAN to calculate_record_count and set alias if set + send_resource_to_datastore(resource, headers_dicts, api_key, ckan_url, + records=None, aliases=alias, calculate_record_count=True) + if alias: + logger.info('Created alias: {}'.format(alias)) + # cleanup temporary files if os.path.exists(tmp.name + ".idx"): os.remove(tmp.name + ".idx") diff --git a/datapusher/settings.py b/datapusher/settings.py index f98d7f0..2739a6a 100644 --- a/datapusher/settings.py +++ b/datapusher/settings.py @@ -21,6 +21,8 @@ # set this to the same value as your ckan.datastore.write_url WRITE_ENGINE_URL = os.environ.get('DATAPUSHER_WRITE_ENGINE_URL', 'postgresql://datapusher:THEPASSWORD@localhost/datastore_default') +AUTO_ALIAS = bool(int(os.environ.get('DATAPUSHER_AUTO_ALIAS', '1'))) + # qsv settings QSV_BIN = os.environ.get('DATAPUSHER_QSV_BIN', '/usr/local/bin/qsvlite') QSV_DEDUP = bool(int(os.environ.get('DATAPUSHER_QSV_DEDUP', '1')))