Hi,
I have a custom tokenizer component working with Rasa open source {2.8.14} but am seeing “ModuleNot Found” errors when I try to run it with Rasa X using manual docker compose installation.
The custom component uses multiple pip packages which are not found at run time even though during custom rasa docker image creation there are no errors found.
I have attached all the pertinent files below. I would really appreciate any suggestions on what I am doing incorrectly.
Error Logs (from rasa-production image)
Starting Rasa X in production mode... 🚀
/opt/venv/lib/python3.8/site-packages/tzlocal/unix.py:158: UserWarning: Can not find any timezone configuration, defaulting to UTC.
warnings.warn('Can not find any timezone configuration, defaulting to UTC.')
2022-03-29 06:43:20 ERROR rasa.core.agent - Failed to update model. The previous model will stay loaded instead.
Traceback (most recent call last):
File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 160, in _update_model_from_server
_load_and_set_updated_model(agent, model_directory, new_fingerprint)
File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 133, in _load_and_set_updated_model
interpreter = _load_interpreter(agent, nlu_path)
File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 92, in _load_interpreter
return rasa.core.interpreter.create_interpreter(nlu_path)
File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 33, in create_interpreter
return RasaNLUInterpreter(model_directory=obj)
File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 127, in __init__
self._load_interpreter()
File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 165, in _load_interpreter
self.interpreter = Interpreter.load(self.model_directory)
File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/model.py", line 341, in load
return Interpreter.create(
File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/model.py", line 416, in create
component = component_builder.load_component(
File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/components.py", line 902, in load_component
component = registry.load_component_by_meta(
File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/registry.py", line 177, in load_component_by_meta
return component_class.load(
File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/components.py", line 591, in load
return cls(meta)
File "/app/custom_japanese_tokenizer.py", line 32, in __init__
import MeCab
ModuleNotFoundError: No module named 'MeCab'
Dockerfile_rasa (for custom rasa image)
# Extend the official Rasa image
FROM rasa/rasa:2.8.14-full
# Change back to root user to install dependencies
USER root
ENV PYTHONPATH "${PYTHONPATH}:/app"
RUN pip install --upgrade pip
RUN pip install --no-cache-dir sudachipy
RUN pip install --no-cache-dir sudachidict_core
RUN pip install --no-cache-dir mecab-python3
RUN pip install --no-cache-dir unidic-lite
# Switch back to non-root to run code
USER 1001
Dockerfile_action_server (for custom action server)
# Extend the official Rasa SDK image
FROM rasa/rasa-sdk:2.8.3
# Change back to root user to install dependencies
USER root
# To install system dependencies
# RUN apt-get update -qq && \
# apt-get install -y <NAME_OF_REQUIRED_PACKAGE> && \
# apt-get clean && \
# rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# To install packages from PyPI
RUN pip install --upgrade pip
RUN pip install --no-cache-dir sudachipy
RUN pip install --no-cache-dir sudachidict_core
RUN pip install --no-cache-dir mecab-python3
RUN pip install --no-cache-dir unidic-lite
# Switch back to non-root to run code
USER 1001
docker-compose.override.yml
version: "3.4"
services:
app:
build:
context: .
dockerfile: Dockerfile_action_server
volumes:
- './actions:/app/actions'
- './data/deviceIDs.json:/app/data/deviceIDs.json'
expose:
- '5055'
depends_on:
- rasa-production
rabbit:
ports:
- "5672:5672"
rasa-production:
volumes:
- ./connectors:/app/connectors
- ./custom_japanese_tokenizer.py:/app/custom_japanese_tokenizer.py
- ./data/deviceIDs.json:/app/data/deviceIDs.json
environment:
- DEFAULT_STREAM_READING_TIMEOUT_IN_SECONDS=60
rasa-worker:
volumes:
- ./connectors:/app/connectors
- ./custom_japanese_tokenizer.py:/app/custom_japanese_tokenizer.py
- ./data/deviceIDs.json:/app/data/deviceIDs.json
custom_japanese_tokenizer.py (custom component)
import re
import typing
from typing import Any, Optional, Text, Dict, List, Type
from rasa.nlu.components import Component
from rasa.nlu.config import RasaNLUModelConfig
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
from rasa.nlu.constants import TOKENS_NAMES, MESSAGE_ATTRIBUTES
from rasa.shared.nlu.constants import (
INTENT,
INTENT_RESPONSE_KEY,
RESPONSE_IDENTIFIER_DELIMITER,
ACTION_NAME,
)
#class SudachiTokenizer(Tokenizer):
class CustomJapaneseTokenizer(Tokenizer):
provides = [TOKENS_NAMES[attribute] for attribute in MESSAGE_ATTRIBUTES]
defaults = {
# Flag to check whether to split intents
"intent_tokenization_flag": False,
# Symbol on which intent should be split
"intent_split_symbol": "_",
}
def __init__(self, component_config: Dict[Text, Any] = None) -> None:
super().__init__(component_config)
import MeCab
self.tagger_obj = MeCab.Tagger("-Owakati")
@classmethod
def required_packages(cls) -> List[Text]:
return ["sudachipy"]
#def tokenize(self, text: Text) -> List[Token]:
def tokenize(self, message: Message, attribute: Text) -> List[Token]:
text = message.get(attribute)
words = []
#words = [m for m in self.tagger_obj.parse(text)]
parsed_text = self.tagger_obj.parse(text)
for i in parsed_text.split():
words.append(i)
print(f"\n---------***--------\n - Text: {text}\n - Parsed Text: {parsed_text}\n - Words: {words}")
return self._convert_words_to_tokens(words, text)