Skip to content

Commit 96524ca

Browse files
[add] tests refactoring per backend (#296) * [add] tests refactoring per backend * [add] extended testing * [add] added specific modelget and scriptget tests on slaves. added tests for ai.info on all backends * [fix] disabled SCRIPTGET new test since it's hanging CI ( further investigation tbd ) * [fix] disabled SCRIPTGET new test since it's hanging CI ( further investigation tbd ) * [fix] fixing encoding issue on string comparison * [add] refactored tests_common to test for tensorset and tensorget across all input formats and reply formats * [add] added modelrun scriptrun disconnect test cases (test for client disconnect during work on background thread) * [add] increased the GPU tests timeout since we added more tests * [add] added valgrind options to RLTest. pruned testing. * [add] added valgrind options to RLTest. pruned testing. * [fix] fixed leak on RedisAI_ReplicateTensorSet * [fix] fixed leak on ret->devicestr in RAI_ModelCreateTorch * [fix] skipping modelrun and scriptrun disconnect on gpu test * [add] tests prunning for ci * [fix] fixing gpu tests for CI * [add] hardened ensureSlaveSynced * [fix] fixed Makefile in accordance to PR review, [add] splitted tf model tests into normal and error behaviour handling -- prioritize leaks * Delete MakefileCopy
1 parent 96078a9 commit 96524ca

12 files changed

+1776
-15
lines changed

.circleci/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ jobs:
126126
command: |
127127
mkdir -p ~/workspace/tests
128128
docker run --gpus all -v $HOME/workspace/tests:/build/test/logs -it --rm redisai-gpu:latest-x64-bionic-test
129+
no_output_timeout: 30m
129130
- store_test_results:
130131
path: ~/workspace/tests
131132
deploy_package:

opt/Makefile

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ BINDIR=$(BINROOT)/src
5555
# INSTALL_DIR=$(ROOT)/install-$(DEVICE)
5656
DEPS_DIR=$(ROOT)/deps/$(OS)-$(ARCH)-$(DEVICE)
5757
INSTALL_DIR=$(ROOT)/bin/$(OS)-$(ARCH)-$(DEVICE)/install
58-
REDIS_VALGRID_SUPRESS=./redis_valgrind.sup
58+
REDIS_VALGRID_SUPRESS=$(ROOT)/opt/redis_valgrind.sup
5959
TARGET=$(BINDIR)/redisai.so
6060

6161
BACKENDS_PATH ?= $(INSTALL_DIR)/backends
@@ -147,22 +147,23 @@ ifeq ($(VERBOSE),1)
147147
TEST_ARGS += -v
148148
endif
149149
ifeq ($(TEST),)
150-
TEST=basic_tests.py
150+
TEST=
151151
PYDEBUG=
152152
else
153-
TEST_ARGS += -s
153+
TEST_ARGS += -s --test $(TEST)
154154
PYDEBUG=1
155155
endif
156156

157-
TEST_PREFIX=set -e; cd $(ROOT)/test
158-
TEST_CMD=\
159-
DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) \
160-
python3 -m RLTest $(TEST_ARGS) --test $(TEST) --module $(INSTALL_DIR)/redisai.so
161-
162157
GEN ?= 1
163158
SLAVES ?= 1
164159
AOF ?= 1
165160

161+
TEST_PREFIX=set -e; cd $(ROOT)/test
162+
# TODO: --errors-for-leak-kinds=definite
163+
VALGRIND_OPTIONS="--leak-check=full -q --show-reachable=no --show-possibly-lost=no"
164+
TEST_CMD= DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) python3 -m RLTest $(TEST_ARGS) --module $(INSTALL_DIR)/redisai.so
165+
VALGRIND_TEST_CMD= DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) python3 -m RLTest $(TEST_ARGS) --module $(INSTALL_DIR)/redisai.so --no-output-catch --use-valgrind --vg-no-fail-on-errors --vg-verbose --vg-options $(VALGRIND_OPTIONS) --vg-suppressions $(realpath $(REDIS_VALGRID_SUPRESS))
166+
166167
test:
167168
ifneq ($(NO_LFS),1)
168169
$(SHOW)if [ "$(git lfs env > /dev/null 2>&1 ; echo $?)" != "0" ]; then cd $(ROOT); git lfs install; fi
@@ -179,6 +180,10 @@ ifeq ($(SLAVES),1)
179180
$(SHOW)$(TEST_PREFIX); printf "\nTests with --use-slaves:\n\n" ;\
180181
$(TEST_CMD) --use-slaves
181182
endif
183+
ifeq ($(VALGRIND),1)
184+
$(SHOW)$(TEST_PREFIX); printf "\nTests with valgrind:\n\n" ;\
185+
$(VALGRIND_TEST_CMD)
186+
endif
182187

183188
#----------------------------------------------------------------------------------------------
184189

@@ -192,10 +197,7 @@ MODULE_ARGS=\
192197
TF redisai_tensorflow.so
193198

194199
VALGRIND_ARGS=\
195-
--leak-check=full \
196-
--show-reachable=no \
197-
--show-possibly-lost=no \
198-
--leak-check=full \
200+
$(VALGRIND_OPTIONS) \
199201
--suppressions=$(realpath $(REDIS_VALGRID_SUPRESS)) \
200202
-v redis-server --protected-mode no --save "" --appendonly no
201203

opt/redis_valgrind.sup

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
{
2+
ignore_unversioned_libs
3+
Memcheck:Leak
4+
...
5+
obj:*/libtensorflow.so.*
6+
}
7+
8+
{
9+
ignore_unversioned_libs
10+
Memcheck:Leak
11+
...
12+
obj:*/libonnxruntime.so.*
13+
}
14+
15+
{
16+
ignore_unversioned_libs
17+
Memcheck:Leak
18+
...
19+
obj:*/libtorch.so.*
20+
}
21+
22+
{
23+
ignore_unversioned_libs
24+
Memcheck:Leak
25+
...
26+
obj:*/libtorch.so*
27+
}
28+
129
{
230
<lzf_unitialized_hash_table>
331
Memcheck:Cond

src/backends/torch.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ RAI_Model *RAI_ModelCreateTorch(RAI_Backend backend, const char* devicestr,
6161
}
6262

6363
void RAI_ModelFreeTorch(RAI_Model* model, RAI_Error *error) {
64+
if(model->devicestr){
65+
RedisModule_Free(model->devicestr);
66+
}
6467
torchDeallocContext(model->model);
6568
}
6669

src/redisai.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -831,9 +831,9 @@ void RedisAI_ReplicateTensorSet(RedisModuleCtx *ctx, RedisModuleString *key, RAI
831831
RedisModule_Replicate(ctx, "AI.TENSORSET", "scvcb", key, dtypestr,
832832
dims, ndims, "BLOB", data, size);
833833

834-
// for (long long i=0; i<ndims; i++) {
835-
// RedisModule_Free(dims[i]);
836-
// }
834+
for (long long i=0; i<ndims; i++) {
835+
RedisModule_FreeString(ctx,dims[i]);
836+
}
837837

838838
RedisModule_Free(dtypestr);
839839
}

test/__init__.py

Whitespace-only changes.

test/includes.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import json
2+
import os
3+
import random
4+
import sys
5+
import time
6+
from multiprocessing import Process
7+
8+
import numpy as np
9+
from skimage.io import imread
10+
from skimage.transform import resize
11+
12+
try:
13+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../deps/readies"))
14+
import paella
15+
except:
16+
pass
17+
18+
TEST_TF = os.environ.get("TEST_TF") != "0" and os.environ.get("WITH_TF") != "0"
19+
TEST_TFLITE = os.environ.get("TEST_TFLITE") != "0" and os.environ.get("WITH_TFLITE") != "0"
20+
TEST_PT = os.environ.get("TEST_PT") != "0" and os.environ.get("WITH_PT") != "0"
21+
TEST_ONNX = os.environ.get("TEST_ONNX") != "0" and os.environ.get("WITH_ORT") != "0"
22+
DEVICE = os.environ.get('DEVICE', 'CPU').upper().encode('utf-8', 'ignore').decode('utf-8')
23+
VALGRIND = os.environ.get("VALGRIND") == "1"
24+
print(f"Running tests on {DEVICE}\n")
25+
26+
27+
def ensureSlaveSynced(con, env, timeout_ms=5000):
28+
if env.useSlaves:
29+
# When WAIT returns, all the previous write commands
30+
# sent in the context of the current connection are
31+
# guaranteed to be received by the number of replicas returned by WAIT.
32+
wait_reply = con.execute_command('WAIT', '1', timeout_ms)
33+
number_replicas = 0
34+
try:
35+
number_replicas = int(wait_reply)
36+
# does not contain anything convertible to int
37+
except ValueError as verr:
38+
pass
39+
# Exception occurred while converting to int
40+
except Exception as ex:
41+
pass
42+
env.assertTrue(number_replicas >= 1)
43+
44+
45+
# Ensures command is sent and forced disconnect
46+
# after without waiting for the reply to be parsed
47+
# Usefull for checking behaviour of commands
48+
# that are run with background threads
49+
def send_and_disconnect(cmd, red):
50+
pool = red.connection_pool
51+
con = pool.get_connection(cmd[0])
52+
ret = con.send_command(*cmd)
53+
con.disconnect()
54+
return ret
55+
56+
57+
def check_cuda():
58+
return os.system('which nvcc')
59+
60+
61+
def info_to_dict(info):
62+
info = [el.decode('utf-8') if type(el) is bytes else el for el in info]
63+
return dict(zip(info[::2], info[1::2]))
64+
65+
66+
def load_mobilenet_test_data():
67+
test_data_path = os.path.join(os.path.dirname(__file__), 'test_data')
68+
labels_filename = os.path.join(test_data_path, 'imagenet_class_index.json')
69+
image_filename = os.path.join(test_data_path, 'panda.jpg')
70+
model_filename = os.path.join(test_data_path, 'mobilenet_v2_1.4_224_frozen.pb')
71+
72+
with open(model_filename, 'rb') as f:
73+
model_pb = f.read()
74+
75+
with open(labels_filename, 'r') as f:
76+
labels = json.load(f)
77+
78+
img_height, img_width = 224, 224
79+
80+
img = imread(image_filename)
81+
img = resize(img, (img_height, img_width), mode='constant', anti_aliasing=True)
82+
img = img.astype(np.float32)
83+
84+
return model_pb, labels, img
85+
86+
87+
def run_mobilenet(con, img, input_var, output_var):
88+
time.sleep(0.5 * random.randint(0, 10))
89+
con.execute_command('AI.TENSORSET', 'input',
90+
'FLOAT', 1, img.shape[1], img.shape[0], img.shape[2],
91+
'BLOB', img.tobytes())
92+
93+
con.execute_command('AI.MODELRUN', 'mobilenet',
94+
'INPUTS', 'input', 'OUTPUTS', 'output')
95+
96+
97+
def run_test_multiproc(env, n_procs, fn, args=tuple()):
98+
procs = []
99+
100+
def tmpfn():
101+
con = env.getConnection()
102+
fn(con, *args)
103+
return 1
104+
105+
for _ in range(n_procs):
106+
p = Process(target=tmpfn)
107+
p.start()
108+
procs.append(p)
109+
110+
[p.join() for p in procs]

0 commit comments

Comments
 (0)