Skip to content

Commit becfc4f

Browse files
Merge pull request #44 from spectriclabs/dev
Add tests for current filter types and refactor filter creation
2 parents 8933791 + bf00330 commit becfc4f

File tree

12 files changed

+139
-230
lines changed

12 files changed

+139
-230
lines changed

Diff for: .github/workflows/test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: "Build, Test, and Publish Test Docker Image"
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: [ master, dev ]
66

77
env:
88
REGISTRY: ghcr.io

Diff for: Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ ENTRYPOINT [ "gunicorn", \
3838
"--ciphers","ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES256-GCM-SHA384", \
3939
"--chdir", "/opt/elastic_datashader", \
4040
"-c", "/opt/elastic_datashader/gunicorn_config.py", \
41-
"--max-requests", "40", \
41+
"--max-requests", "400", \
4242
"--workers", "30", \
4343
"-k", "uvicorn.workers.UvicornWorker", \
4444
"elastic_datashader:app" \

Diff for: elastic_datashader/cache.py

+11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from collections import OrderedDict
33
from datetime import datetime, timedelta, timezone
44
from os import scandir
5+
import os
6+
from contextlib import suppress
57
from pathlib import Path
68
from shutil import rmtree
79
from time import time
@@ -162,6 +164,15 @@ def age_off_cache(cache_path: Path, idx_name: str, max_age: timedelta) -> None:
162164
# set missing_ok=True in case another process deleted the same file
163165
file_path.unlink(missing_ok=True)
164166

167+
# clear all empty dirs and dirs that contain empty dirs to prevent build up of param hash directories
168+
remove_empty_dirs(cache_path/idx_name)
169+
170+
def remove_empty_dirs(path: Path):
171+
for root, dirs, _ in os.walk(path, topdown=False):
172+
for d in dirs:
173+
with suppress(OSError):
174+
os.rmdir(Path(root, d))
175+
165176
def get_idx_names(cache_path: Path) -> Iterable[str]:
166177
for path in cache_path.glob("*"):
167178
if path.is_dir():

Diff for: elastic_datashader/config.py

-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ class Config:
1818
cache_cleanup_interval: timedelta
1919
cache_path: Path
2020
cache_timeout: timedelta
21-
csrf_secret_key: str
2221
datashader_headers: Dict[Any, Any]
2322
elastic_hosts: str
2423
ellipse_render_mode: str
@@ -29,8 +28,6 @@ class Config:
2928
max_ellipses_per_tile: int
3029
max_legend_items_per_tile: int
3130
num_ellipse_points: int
32-
proxy_host: Optional[str]
33-
proxy_prefix: str
3431
query_timeout_seconds: int
3532
render_timeout: timedelta
3633
tms_key: Optional[str]
@@ -93,7 +90,6 @@ def config_from_env(env) -> Config:
9390
cache_cleanup_interval=timedelta(seconds=int(env.get("DATASHADER_CACHE_CLEANUP_INTERVAL", 5*60))),
9491
cache_path=Path(env.get("DATASHADER_CACHE_DIRECTORY", "tms-cache")),
9592
cache_timeout=timedelta(seconds=int(env.get("DATASHADER_CACHE_TIMEOUT", 60*60))),
96-
csrf_secret_key=env.get("DATASHADER_CSRF_SECRET_KEY", "CSRFProtectionKey"),
9793
datashader_headers=load_datashader_headers(env.get("DATASHADER_HEADER_FILE", "headers.yaml")),
9894
elastic_hosts=env.get("DATASHADER_ELASTIC", "http://localhost:9200"),
9995
ellipse_render_mode=env.get("DATASHADER_ELLIPSE_RENDER_MODE", "matrix"),
@@ -104,8 +100,6 @@ def config_from_env(env) -> Config:
104100
max_ellipses_per_tile=int(env.get("DATASHADER_MAX_ELLIPSES_PER_TILE", 100_000)),
105101
max_legend_items_per_tile=int(env.get("MAX_LEGEND_ITEMS_PER_TILE", 20)),
106102
num_ellipse_points=int(env.get("DATASHADER_NUM_ELLIPSE_POINTS", 100)),
107-
proxy_host=env.get("DATASHADER_PROXY_HOST", None),
108-
proxy_prefix=env.get("DATASHADER_PROXY_PREFIX", ""),
109103
query_timeout_seconds=int(env.get("DATASHADER_QUERY_TIMEOUT", 0)),
110104
render_timeout=timedelta(seconds=int(env.get("DATASHADER_RENDER_TIMEOUT", 30))),
111105
tms_key=env.get("DATASHADER_TMS_KEY", None),

Diff for: elastic_datashader/elastic.py

+13-141
Original file line numberDiff line numberDiff line change
@@ -158,26 +158,6 @@ def convert_nm_to_ellipse_units(distance: float, units: str) -> float:
158158
# NB. assume "majmin_m" if any others
159159
return distance * 1852
160160

161-
def get_field_type(elastic_hosts: str, headers: Optional[str], params: Dict[str, Any], field: str, idx: str) -> str:
162-
user = params.get("user")
163-
x_opaque_id = params.get("x-opaque-id")
164-
es = Elasticsearch(
165-
elastic_hosts.split(","),
166-
verify_certs=False,
167-
timeout=900,
168-
headers=get_es_headers(headers, user, x_opaque_id),
169-
)
170-
if idx.find("*:") != -1:
171-
idx = idx[idx.find("*:")+2:] # when you query for mappings if it is cross cluster you don't get a mapping
172-
mappings = es.indices.get_field_mapping(fields=field, index=idx)
173-
# {'foot_prints': {'mappings': {'foot_print': {'full_name': 'foot_print', 'mapping': {'foot_print': {'type': 'geo_shape'}}}}}}
174-
index = list(mappings.keys())[0] # if index is my_index* it comes back as my_index
175-
field_parts = field.split(".")
176-
try:
177-
return mappings[index]['mappings'][field]['mapping'][field_parts[-1]]['type'] # handles 'geo_center' or a nested object {signal:{geo:{location:{}}}}
178-
except AttributeError:
179-
return mappings[index]['mappings'][field]['mapping'][field]['type'] # handles literal string with periods 'signal.geo.location'
180-
181161
def get_search_base(
182162
elastic_hosts: str,
183163
headers: Optional[str],
@@ -271,21 +251,6 @@ def get_search_base(
271251

272252
return base_s
273253

274-
def handle_range_or_exists_filters(filter_input: Dict[Any, Any]) -> Dict[str, Any]:
275-
"""
276-
`range` and `exists` filters can appear either directly under
277-
`filter[]` or under `filter[].query` depending on the version
278-
of Kibana, the former being the old way, so they need special
279-
handling for backward compatibility.
280-
"""
281-
filter_type = filter_input.get("meta").get("type") # "range" or "exists"
282-
283-
# Handle old query structure for backward compatibility
284-
if filter_input.get(filter_type) is not None:
285-
return {filter_type: filter_input.get(filter_type)}
286-
287-
return filter_input.get("query")
288-
289254
def build_dsl_filter(filter_inputs) -> Optional[Dict[str, Any]]:
290255
"""
291256
@@ -309,78 +274,25 @@ def build_dsl_filter(filter_inputs) -> Optional[Dict[str, Any]]:
309274
f.get("geo_shape") or
310275
f.get("geo_distance")
311276
)
312-
313-
# Handle spatial filters
314-
if is_spatial_filter:
315-
if f.get("geo_polygon"):
316-
geo_polygon_dict = {"geo_polygon": f.get("geo_polygon")}
317-
if f.get("meta").get("negate"):
318-
filter_dict["must_not"].append(geo_polygon_dict)
319-
else:
320-
filter_dict["filter"].append(geo_polygon_dict)
321-
elif f.get("geo_bounding_box"):
322-
geo_bbox_dict = {"geo_bounding_box": f.get("geo_bounding_box")}
323-
if f.get("meta").get("negate"):
324-
filter_dict["must_not"].append(geo_bbox_dict)
325-
else:
326-
filter_dict["filter"].append(geo_bbox_dict)
327-
elif f.get("geo_shape"):
328-
geo_bbox_dict = {"geo_shape": f.get("geo_shape")}
329-
if f.get("meta").get("negate"):
330-
filter_dict["must_not"].append(geo_bbox_dict)
331-
else:
332-
filter_dict["filter"].append(geo_bbox_dict)
333-
elif f.get("geo_distance"):
334-
geo_bbox_dict = {"geo_distance": f.get("geo_distance")}
335-
if f.get("meta").get("negate"):
336-
filter_dict["must_not"].append(geo_bbox_dict)
337-
else:
338-
filter_dict["filter"].append(geo_bbox_dict)
339-
elif f.get("query"):
340-
if f.get("meta").get("negate"):
341-
filter_dict["must_not"].append(f.get("query"))
342-
else:
343-
filter_dict["filter"].append(f.get("query"))
344-
else:
345-
raise ValueError("unsupported spatial_filter {}".format(f)) # pylint: disable=C0209
346-
347-
# Handle phrase matching
348-
elif f.get("meta").get("type") in ("phrase", "phrases", "bool"):
277+
if f.get("query", None):
349278
if f.get("meta").get("negate"):
350279
filter_dict["must_not"].append(f.get("query"))
351280
else:
352281
filter_dict["filter"].append(f.get("query"))
353-
354-
elif f.get("meta").get("type") in ("range", "exists"):
355-
if f.get("meta").get("negate"):
356-
filter_dict["must_not"].append(handle_range_or_exists_filters(f))
357-
else:
358-
filter_dict["filter"].append(handle_range_or_exists_filters(f))
359-
360-
elif f.get("meta", {}).get("type") == "custom" and f.get("meta", {}).get("key") is not None:
361-
filter_key = f.get("meta", {}).get("key")
362-
if f.get("meta", {}).get("negate"):
363-
if filter_key == "query":
364-
filt_index = list(f.get(filter_key))[0]
365-
filter_dict["must_not"].append({filt_index: f.get(filter_key).get(filt_index)})
366-
else:
367-
filter_dict["must_not"].append({filter_key: f.get(filter_key)})
368-
else:
369-
if filter_key == "query":
370-
filt_index = list(f.get(filter_key))[0]
371-
filter_dict["must_not"].append({filt_index: f.get(filter_key).get(filt_index)})
372-
else:
373-
filter_dict["filter"].append({filter_key: f.get(filter_key)})
374-
375282
else:
376-
# Here we handle filters that don't send a type (this happens when controls send filters)
377-
# example filters[{"meta":{"index":"11503c28-7d88-4f9a-946b-2997a5ea64cf","key":"name"},"query":{"match_phrase":{"name":"word_5"}}}]
378-
if f.get("meta", {}).get("negate"):
379-
filter_dict["must_not"].append(f.get("query"))
283+
if not is_spatial_filter:
284+
filt_type = f.get("meta").get("type")
285+
if f.get("meta").get("negate"):
286+
filter_dict["must_not"].append({filt_type: f.get(filt_type)})
287+
else:
288+
filter_dict["filter"].append({filt_type: f.get(filt_type)})
380289
else:
381-
filter_dict["filter"].append(f.get("query"))
382-
# raise ValueError("unsupported filter type {}".format(f.get("meta").get("type"))) # pylint: disable=C0209
383-
290+
for geo_type in ["geo_polygon", "geo_bounding_box", "geo_shape", "geo_distance"]:
291+
if f.get(geo_type, None):
292+
if f.get("meta").get("negate"):
293+
filter_dict["must_not"].append({geo_type: f.get(geo_type)})
294+
else:
295+
filter_dict["filter"].append({geo_type: f.get(geo_type)})
384296
logger.info("Filter output %s", filter_dict)
385297
return filter_dict
386298

@@ -449,32 +361,6 @@ def parse_duration_interval(interval):
449361
kwargs[key] = int(interval[0:len(interval)-1])
450362
return relativedelta(**kwargs)
451363

452-
def convert(response, category_formatter=str):
453-
"""
454-
455-
:param response:
456-
:return:
457-
"""
458-
if hasattr(response.aggregations, "categories"):
459-
for category in response.aggregations.categories:
460-
for bucket in category.grids:
461-
x, y = lnglat_to_meters(
462-
bucket.centroid.location.lon, bucket.centroid.location.lat
463-
)
464-
yield {
465-
"lon": bucket.centroid.location.lon,
466-
"lat": bucket.centroid.location.lat,
467-
"x": x,
468-
"y": y,
469-
"c": bucket.centroid.count,
470-
"t": category_formatter(category.key),
471-
}
472-
else:
473-
for bucket in response.aggregations.grids:
474-
lon = bucket.centroid.location.lon
475-
lat = bucket.centroid.location.lat
476-
x, y = lnglat_to_meters(lon, lat)
477-
yield {"lon": lon, "lat": lat, "x": x, "y": y, "c": bucket.centroid.count}
478364

479365
def convert_composite(response, categorical, filter_buckets, histogram_interval, category_type, category_format):
480366
if categorical and filter_buckets is False:
@@ -586,20 +472,6 @@ def get_nested_field_from_hit(hit, field_parts: List[str], default=None):
586472

587473
raise ValueError("field must be provided")
588474

589-
def chunk_iter(iterable, chunk_size):
590-
chunks = [None] * chunk_size
591-
i = -1
592-
for i, v in enumerate(iterable):
593-
idx = i % chunk_size
594-
if idx == 0 and i > 0:
595-
i = -1
596-
yield (True, chunks)
597-
chunks[idx] = v
598-
599-
if i >= 0:
600-
last_written_idx = i % chunk_size
601-
yield (False, chunks[0:last_written_idx+1])
602-
603475
def bucket_noop(bucket, search):
604476
# pylint: disable=unused-argument
605477
return bucket

Diff for: elastic_datashader/parameters.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from datetime import datetime, timedelta, timezone
22
from hashlib import sha256
33
from json import loads
4-
from socket import gethostname
54
from time import sleep
65
from typing import Any, Dict, Optional, Tuple
76
from urllib.parse import unquote
@@ -351,7 +350,6 @@ def generate_global_params(headers, params, idx):
351350
if category_type == "number":
352351
bounds_s.aggs.metric("field_stats", "stats", field=category_field)
353352

354-
# field_type = get_field_type(config.elastic_hosts, headers, params, geopoint_field, idx)
355353
field_type = params["geofield_type"] # CCS you cannot get mappings so we needed to push the field type from the client side
356354
# Execute and process search
357355
if len(list(bounds_s.aggs)) > 0 and field_type != "geo_shape":
@@ -470,7 +468,7 @@ def generate_global_params(headers, params, idx):
470468

471469

472470
def merge_generated_parameters(headers, params, idx, param_hash):
473-
layer_id = f"{param_hash}_{gethostname()}"
471+
layer_id = f"{param_hash}_{config.hostname}"
474472
es = Elasticsearch(
475473
config.elastic_hosts.split(","),
476474
verify_certs=False,
@@ -488,7 +486,7 @@ def merge_generated_parameters(headers, params, idx, param_hash):
488486
try:
489487
doc = Document(
490488
_id=layer_id,
491-
creating_host=gethostname(),
489+
creating_host=config.hostname,
492490
creating_pid=os.getpid(),
493491
creating_timestamp=datetime.now(timezone.utc),
494492
generated_params=None,
@@ -533,7 +531,7 @@ def merge_generated_parameters(headers, params, idx, param_hash):
533531
generated_params = {
534532
"complete": False,
535533
"generation_start_time": datetime.now(timezone.utc),
536-
"generating_host": gethostname(),
534+
"generating_host": config.hostname,
537535
"generating_pid": os.getpid(),
538536
}
539537

Diff for: elastic_datashader/routers/tms.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from datetime import datetime, timezone
22
from os import getpid
3-
from socket import gethostname
43
from typing import Optional
54
import time
65
import uuid
@@ -25,7 +24,7 @@
2524
)
2625
from ..config import config
2726
from ..drawing import generate_x_tile
28-
from ..elastic import get_es_headers
27+
from ..elastic import get_es_headers, get_search_base
2928
from ..logger import logger
3029
from ..parameters import extract_parameters, merge_generated_parameters
3130
from ..tilegen import (
@@ -111,7 +110,7 @@ def create_datashader_tiles_entry(es, **kwargs) -> None:
111110
'''
112111
doc_info = {
113112
**kwargs,
114-
'host': gethostname(),
113+
'host': config.hostname,
115114
'pid': getpid(),
116115
'timestamp': datetime.now(timezone.utc),
117116
}
@@ -157,7 +156,7 @@ def cached_response(es, idx, x, y, z, params, parameter_hash) -> Optional[Respon
157156
except NotFoundError:
158157
logger.warning("Unable to find cached tile entry in .datashader_tiles")
159158

160-
return make_image_response(img, params.get("user") or "", parameter_hash, 60)
159+
return make_image_response(img, params.get("user") or "", parameter_hash, config.cache_timeout.seconds)
161160

162161
logger.debug("Did not find image in cache: %s", tile_name(idx, x, y, z, parameter_hash))
163162
return None
@@ -294,6 +293,10 @@ async def fetch_or_render_tile(already_waited: int, idx: str, x: int, y: int, z:
294293
# Get hash and parameters
295294
try:
296295
parameter_hash, params = extract_parameters(request.headers, request.query_params)
296+
# try to build the dsl object bad filters cause exceptions that are then retried.
297+
# underlying elasticsearch_dsl doesn't support the elasticsearch 8 api yet so this causes requests to thrash
298+
# If the filters are bad or elasticsearch_dsl cannot build the request will never be completed so serve X tile
299+
get_search_base(config.elastic_hosts, request.headers, params, idx)
297300
except Exception as ex: # pylint: disable=W0703
298301
logger.exception("Error while extracting parameters")
299302
params = {"user": request.headers.get("es-security-runas-user", None)}

Diff for: elastic_datashader/tilegen.py

-1
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,6 @@ def generate_tile(idx, x, y, z, headers, params, tile_width_px=256, tile_height_
10991099

11001100
# the composite needs one bin for 'after_key'
11011101
composite_agg_size = int(max_bins / inner_agg_size) - 1
1102-
# field_type = get_field_type(config.elastic_hosts, headers, params, geopoint_field, idx)
11031102
field_type = params["geofield_type"] # CCS you cannot get mappings so we needed to push the field type from the client side
11041103
partial_data = False # TODO can we get partial data?
11051104
span = None

Diff for: tests/test_cache.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test_clear_hash_cache(tmp_path):
7575

7676

7777
def test_age_off_cache(tmp_path):
78-
xdir = tmp_path / "fooindex/somehash/3/1"
78+
xdir = tmp_path / "fooindex/some_new_hash/3/1"
7979
xdir.mkdir(parents=True)
8080

8181
yfile = xdir / "2.png"
@@ -90,6 +90,10 @@ def test_age_off_cache(tmp_path):
9090

9191
assert not yfile.exists()
9292
assert yfile_after.exists()
93+
sleep(2)
94+
# clear again should remove all files and empty folders
95+
cache.age_off_cache(tmp_path, "fooindex", timedelta(seconds=1))
96+
assert not xdir.exists()
9397

9498

9599
def test_build_layer_info(tmp_path):

0 commit comments

Comments
 (0)