Open
Description
I was trying to build models with bambi in loop. All previous 90 iterations worked out just fine. Then it just stuck at
0.00% [0/8000 00:00<? Sampling 4 chains, 0 divergences]
target = 'target'
all_mean_proba = []
ds = DataSplit(df, target=target)
for date, idx, is_first, train, test in ds.ts_split():
clear_output(wait=True)
non_constant_col = train.nunique()[train.nunique() > 1].index
non_constant_col = list(non_constant_col)
if target not in non_constant_col:
non_constant_col.append(target)
train = train[non_constant_col].copy()
test = test[non_constant_col].copy()
X = [
'col1',
'col2',
'col3',
'col4',
'col5',
'col6',
'col7',
'col8',
'col9',
'col10',
'col11',
'col12'
]
X = [x for x in X if x in non_constant_col]
X = " + ".join(X)
# Initialize the fixed effects only model
model = bmb.Model(f'{target} ~ {X}', train)
# Fit the model using 1000 on each of 4 chains
results = model.fit(draws=1000, chains=4)
# Out of sample predictions
model.predict(results, data=test)
# Select a sample of posterior values for the mean probability
posterior = az.extract(results, num_samples=2000)[f"{target}_mean"]
mean_proba = (posterior.values * win_odds).mean(1)
all_mean_proba.append(mean_proba)
Traceback
Complete error traceback
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Intercept, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, target_sigma]
0.00% [0/8000 00:00<? Sampling 4 chains, 0 divergences]
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
File ~/anaconda3/lib/python3.9/site-packages/pymc/sampling.py:1522, in _mp_sample(draws, tune, step, chains, cores, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, **kwargs)
1521 with sampler:
-> 1522 for draw in sampler:
1523 strace = traces[draw.chain]
File ~/anaconda3/lib/python3.9/site-packages/pymc/parallel_sampling.py:458, in ParallelSampler.__iter__(self)
457 while self._active:
--> 458 draw = ProcessAdapter.recv_draw(self._active)
459 proc, is_last, draw, tuning, stats = draw
File ~/anaconda3/lib/python3.9/site-packages/pymc/parallel_sampling.py:328, in ProcessAdapter.recv_draw(processes, timeout)
327 pipes = [proc._msg_pipe for proc in processes]
--> 328 ready = multiprocessing.connection.wait(pipes)
329 if not ready:
File ~/anaconda3/lib/python3.9/multiprocessing/connection.py:936, in wait(object_list, timeout)
935 while True:
--> 936 ready = selector.select(timeout)
937 if ready:
File ~/anaconda3/lib/python3.9/selectors.py:416, in _PollLikeSelector.select(self, timeout)
415 try:
--> 416 fd_event_list = self._selector.poll(timeout)
417 except InterruptedError:
KeyboardInterrupt:
During handling of the above exception, another exception occurred:
KeyboardInterrupt Traceback (most recent call last)
File ~/anaconda3/lib/python3.9/site-packages/pymc/sampling.py:1530, in _mp_sample(draws, tune, step, chains, cores, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, **kwargs)
1529 if callback is not None:
-> 1530 callback(trace=trace, draw=draw)
1532 except ps.ParallelSamplingError as error:
File ~/anaconda3/lib/python3.9/site-packages/pymc/parallel_sampling.py:491, in ParallelSampler.__exit__(self, *args)
490 def __exit__(self, *args):
--> 491 ProcessAdapter.terminate_all(self._samplers)
File ~/anaconda3/lib/python3.9/site-packages/pymc/parallel_sampling.py:365, in ProcessAdapter.terminate_all(processes, patience)
364 raise multiprocessing.TimeoutError()
--> 365 process.join(timeout)
366 except multiprocessing.TimeoutError:
File ~/anaconda3/lib/python3.9/site-packages/pymc/parallel_sampling.py:318, in ProcessAdapter.join(self, timeout)
317 def join(self, timeout=None):
--> 318 self._process.join(timeout)
File ~/anaconda3/lib/python3.9/multiprocessing/process.py:149, in BaseProcess.join(self, timeout)
148 assert self._popen is not None, 'can only join a started process'
--> 149 res = self._popen.wait(timeout)
150 if res is not None:
File ~/anaconda3/lib/python3.9/multiprocessing/popen_fork.py:40, in Popen.wait(self, timeout)
39 from multiprocessing.connection import wait
---> 40 if not wait([self.sentinel], timeout):
41 return None
File ~/anaconda3/lib/python3.9/multiprocessing/connection.py:936, in wait(object_list, timeout)
935 while True:
--> 936 ready = selector.select(timeout)
937 if ready:
File ~/anaconda3/lib/python3.9/selectors.py:416, in _PollLikeSelector.select(self, timeout)
415 try:
--> 416 fd_event_list = self._selector.poll(timeout)
417 except InterruptedError:
KeyboardInterrupt:
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Input In [100], in <cell line: 5>()
43 model = bmb.Model(f'{target} ~ {X}', train)
45 # Fit the model using 1000 on each of 4 chains
---> 46 results = model.fit(draws=1000, chains=4)
48 # Out of sample predictions
49 model.predict(results, data=test)
File ~/anaconda3/lib/python3.9/site-packages/bambi/models.py:277, in Model.fit(self, draws, tune, discard_tuned_samples, omit_offsets, include_mean, inference_method, init, n_init, chains, cores, random_seed, **kwargs)
270 if isinstance(self.family, univariate.Bernoulli):
271 _log.info(
272 "Modeling the probability that %s==%s",
273 self.response.name,
274 str(self.response.success),
275 )
--> 277 return self.backend.run(
278 draws=draws,
279 tune=tune,
280 discard_tuned_samples=discard_tuned_samples,
281 omit_offsets=omit_offsets,
282 include_mean=include_mean,
283 inference_method=inference_method,
284 init=init,
285 n_init=n_init,
286 chains=chains,
287 cores=cores,
288 random_seed=random_seed,
289 **kwargs,
290 )
File ~/anaconda3/lib/python3.9/site-packages/bambi/backend/pymc.py:94, in PyMCModel.run(self, draws, tune, discard_tuned_samples, omit_offsets, include_mean, inference_method, init, n_init, chains, cores, random_seed, **kwargs)
92 # NOTE: Methods return different types of objects (idata, approximation, and dictionary)
93 if inference_method in ["mcmc", "nuts_numpyro", "nuts_blackjax"]:
---> 94 result = self._run_mcmc(
95 draws,
96 tune,
97 discard_tuned_samples,
98 omit_offsets,
99 include_mean,
100 init,
101 n_init,
102 chains,
103 cores,
104 random_seed,
105 inference_method,
106 **kwargs,
107 )
108 elif inference_method == "vi":
109 result = self._run_vi(**kwargs)
File ~/anaconda3/lib/python3.9/site-packages/bambi/backend/pymc.py:283, in PyMCModel._run_mcmc(self, draws, tune, discard_tuned_samples, omit_offsets, include_mean, init, n_init, chains, cores, random_seed, sampler_backend, **kwargs)
281 if sampler_backend == "mcmc":
282 try:
--> 283 idata = pm.sample(
284 draws=draws,
285 tune=tune,
286 discard_tuned_samples=discard_tuned_samples,
287 init=init,
288 n_init=n_init,
289 chains=chains,
290 cores=cores,
291 random_seed=random_seed,
292 **kwargs,
293 )
294 except (RuntimeError, ValueError):
295 if (
296 "ValueError: Mass matrix contains" in traceback.format_exc()
297 and init == "auto"
298 ):
File ~/anaconda3/lib/python3.9/site-packages/pymc/sampling.py:617, in sample(draws, step, init, n_init, initvals, trace, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, keep_warning_stat, idata_kwargs, mp_ctx, **kwargs)
615 _print_step_hierarchy(step)
616 try:
--> 617 mtrace = _mp_sample(**sample_args, **parallel_args)
618 except pickle.PickleError:
619 _log.warning("Could not pickle model, sampling singlethreaded.")
File ~/anaconda3/lib/python3.9/site-packages/pymc/sampling.py:1543, in _mp_sample(draws, tune, step, chains, cores, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, **kwargs)
1541 except KeyboardInterrupt:
1542 if discard_tuned_samples:
-> 1543 traces, length = _choose_chains(traces, tune)
1544 else:
1545 traces, length = _choose_chains(traces, 0)
File ~/anaconda3/lib/python3.9/site-packages/pymc/sampling.py:1582, in _choose_chains(traces, tune)
1580 lengths = [max(0, len(trace) - tune) for trace in traces]
1581 if not sum(lengths):
-> 1582 raise ValueError("Not enough samples to build a trace.")
1584 idxs = np.argsort(lengths)
1585 l_sort = np.array(lengths)[idxs]
ValueError: Not enough samples to build a trace.
Specs
OS: Ubuntu 22.04.1 LTS x86_64
Host: G7 MD
Kernel: 5.15.0-53-generic
Uptime: 4 hours, 38 mins
Packages: 2102 (dpkg), 19 (snap)
Shell: bash 5.1.16
Resolution: 1920x1080
DE: GNOME 42.5
WM: Mutter
Terminal: gnome-terminal
CPU: 11th Gen Intel i7-11800H (16) @
GPU: NVIDIA GeForce RTX 3050 Ti Mobi
GPU: Intel TigerLake-H GT1 [UHD Grap
Memory: 10436MiB / 15780MiB
Versions and main components
- PyMC/PyMC3 Version: PyMC
- Aesara/Theano Version: Aesara
- Python Version: Python 3.9.12
- How did you install PyMC/PyMC3: pip
Dataset
train.csv