Hi,
I’ve been trying to train my rasa model but I’m facing some issues. previously I used to train my rasa model without the use of gpu i.e. I didn’t configure CUDA and tensorflow-gpu was also not installed. To make the training fast, I configured tensorflow-gpu and CUDA as well, the training also starts with increased speed, but after 15 epochs it runs in the following error: ‘Error running graph component for node train_DIETClassifier6’.
Can anyone help me in the above issue?
The whole log of the error is as follows:
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\graph.py", line 464, in __call__
output = self._fn(self._component, **run_kwargs)
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\nlu\classifiers\diet_classifier.py", line 923, in train
self.model.fit(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\utils\tensorflow\temp_keras_modules.py", line 388, in fit
tmp_logs = self.train_function(iterator)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[93,217,217] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[node zeros_like_40
(defined at C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\utils\tensorflow\models.py:158)
]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
[Op:__inference_train_function_47897]
Errors may have originated from an input operation.
Input Source operations connected to node zeros_like_40:
In[0] cond_1/PartitionedCall (defined at C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\tensorflow_addons\text\crf.py:201)
Operation defined at: (most recent call last)
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\runpy.py", line 194, in _run_module_as_main
>>> return _run_code(code, main_globals, None,
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\runpy.py", line 87, in _run_code
>>> exec(code, run_globals)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\Scripts\rasa.exe\__main__.py", line 7, in <module>
>>> sys.exit(main())
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\__main__.py", line 110, in main
>>> cmdline_arguments.func(cmdline_arguments)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\cli\train.py", line 62, in <lambda>
>>> train_parser.set_defaults(func=lambda args: run_training(args, can_exit=True))
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\cli\train.py", line 94, in run_training
>>> training_result = train_all(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\api.py", line 105, in train
>>> return train(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\model_training.py", line 160, in train
>>> return _train_graph(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\model_training.py", line 234, in _train_graph
>>> trainer.train(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\training\graph_trainer.py", line 105, in train
>>> graph_runner.run(inputs={PLACEHOLDER_IMPORTER: importer})
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\runner\dask.py", line 101, in run
>>> dask_result = dask.get(run_graph, run_targets)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 552, in get_sync
>>> return get_async(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 494, in get_async
>>> fire_tasks(chunksize)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 489, in fire_tasks
>>> fut = submit(batch_execute_tasks, each_args)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 537, in submit
>>> fut.set_result(fn(*args, **kwargs))
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 233, in batch_execute_tasks
>>> return [execute_task(*a) for a in it]
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 233, in <listcomp>
>>> return [execute_task(*a) for a in it]
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 219, in execute_task
>>> result = _execute_task(task, data)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\core.py", line 119, in _execute_task
>>> return func(*(_execute_task(a, cache) for a in args))
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\graph.py", line 464, in __call__
>>> output = self._fn(self._component, **run_kwargs)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\nlu\classifiers\diet_classifier.py", line 923, in train
>>> self.model.fit(
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>> return fn(*args, **kwargs)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\utils\tensorflow\temp_keras_modules.py", line 388, in fit
>>> tmp_logs = self.train_function(iterator)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\keras\engine\training.py", line 878, in train_function
>>> return step_function(self, iterator)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\keras\engine\training.py", line 867, in step_function
>>> outputs = model.distribute_strategy.run(run_step, args=(data,))
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\keras\engine\training.py", line 860, in run_step
>>> outputs = model.train_step(data)
>>>
>>> File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\utils\tensorflow\models.py", line 158, in train_step
>>> prediction_gradients = tape.gradient(prediction_loss, self.trainable_variables)
>>>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\rsran\miniconda3\envs\rasa\lib\runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\rsran\miniconda3\envs\rasa\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\rsran\miniconda3\envs\rasa\Scripts\rasa.exe\__main__.py", line 7, in <module>
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\__main__.py", line 110, in main
cmdline_arguments.func(cmdline_arguments)
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\cli\train.py", line 62, in <lambda>
train_parser.set_defaults(func=lambda args: run_training(args, can_exit=True))
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\cli\train.py", line 94, in run_training
training_result = train_all(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\api.py", line 105, in train
return train(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\model_training.py", line 160, in train
return _train_graph(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\model_training.py", line 234, in _train_graph
trainer.train(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\training\graph_trainer.py", line 105, in train
graph_runner.run(inputs={PLACEHOLDER_IMPORTER: importer})
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\runner\dask.py", line 101, in run
dask_result = dask.get(run_graph, run_targets)
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 552, in get_sync
return get_async(
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 495, in get_async
for key, res_info, failed in queue_get(queue).result():
File "C:\Users\rsran\miniconda3\envs\rasa\lib\concurrent\futures\_base.py", line 437, in result
return self.__get_result()
File "C:\Users\rsran\miniconda3\envs\rasa\lib\concurrent\futures\_base.py", line 389, in __get_result
raise self._exception
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 537, in submit
fut.set_result(fn(*args, **kwargs))
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 233, in batch_execute_tasks
return [execute_task(*a) for a in it]
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 233, in <listcomp>
return [execute_task(*a) for a in it]
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 224, in execute_task
result = pack_exception(e, dumps)
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\local.py", line 219, in execute_task
result = _execute_task(task, data)
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\dask\core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "C:\Users\rsran\miniconda3\envs\rasa\lib\site-packages\rasa\engine\graph.py", line 471, in __call__
raise GraphComponentException(
rasa.engine.exceptions.GraphComponentException: Error running graph component for node train_DIETClassifier6.
Thanks in advance for your help.