Custom Docker image for Rasa X with custom component using pip packages

Hi,

I have a custom tokenizer component working with Rasa open source {2.8.14} but am seeing “ModuleNot Found” errors when I try to run it with Rasa X using manual docker compose installation.

The custom component uses multiple pip packages which are not found at run time even though during custom rasa docker image creation there are no errors found.

I have attached all the pertinent files below. I would really appreciate any suggestions on what I am doing incorrectly.

Error Logs (from rasa-production image)

Starting Rasa X in production mode... 🚀
/opt/venv/lib/python3.8/site-packages/tzlocal/unix.py:158: UserWarning: Can not find any timezone configuration, defaulting to UTC.
  warnings.warn('Can not find any timezone configuration, defaulting to UTC.')
2022-03-29 06:43:20 ERROR    rasa.core.agent  - Failed to update model. The previous model will stay loaded instead.
Traceback (most recent call last):
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 160, in _update_model_from_server
    _load_and_set_updated_model(agent, model_directory, new_fingerprint)
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 133, in _load_and_set_updated_model
    interpreter = _load_interpreter(agent, nlu_path)
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/agent.py", line 92, in _load_interpreter
    return rasa.core.interpreter.create_interpreter(nlu_path)
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 33, in create_interpreter
    return RasaNLUInterpreter(model_directory=obj)
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 127, in __init__
    self._load_interpreter()
  File "/opt/venv/lib/python3.8/site-packages/rasa/core/interpreter.py", line 165, in _load_interpreter
    self.interpreter = Interpreter.load(self.model_directory)
  File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/model.py", line 341, in load
    return Interpreter.create(
  File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/model.py", line 416, in create
    component = component_builder.load_component(
  File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/components.py", line 902, in load_component
    component = registry.load_component_by_meta(
  File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/registry.py", line 177, in load_component_by_meta
    return component_class.load(
  File "/opt/venv/lib/python3.8/site-packages/rasa/nlu/components.py", line 591, in load
    return cls(meta)
  File "/app/custom_japanese_tokenizer.py", line 32, in __init__
    import MeCab
ModuleNotFoundError: No module named 'MeCab'

Dockerfile_rasa (for custom rasa image)

# Extend the official Rasa image
FROM rasa/rasa:2.8.14-full

# Change back to root user to install dependencies
USER root

ENV PYTHONPATH "${PYTHONPATH}:/app"

RUN pip install --upgrade pip
RUN pip install --no-cache-dir sudachipy
RUN pip install --no-cache-dir sudachidict_core
RUN pip install --no-cache-dir mecab-python3
RUN pip install --no-cache-dir unidic-lite

# Switch back to non-root to run code
USER 1001

Dockerfile_action_server (for custom action server)

# Extend the official Rasa SDK image
FROM rasa/rasa-sdk:2.8.3

# Change back to root user to install dependencies
USER root

# To install system dependencies
# RUN apt-get update -qq && \
#    apt-get install -y <NAME_OF_REQUIRED_PACKAGE> && \
#    apt-get clean && \
#    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# To install packages from PyPI
RUN pip install --upgrade pip
RUN pip install --no-cache-dir sudachipy
RUN pip install --no-cache-dir sudachidict_core
RUN pip install --no-cache-dir mecab-python3
RUN pip install --no-cache-dir unidic-lite

# Switch back to non-root to run code
USER 1001

docker-compose.override.yml

version: "3.4"

services:
  app:
    build:
      context: .
      dockerfile: Dockerfile_action_server
    volumes:
      - './actions:/app/actions'
      - './data/deviceIDs.json:/app/data/deviceIDs.json'
    expose:
      - '5055'
    depends_on:
      - rasa-production
  rabbit:
    ports:
      - "5672:5672"
  rasa-production:
    volumes:
      - ./connectors:/app/connectors
      - ./custom_japanese_tokenizer.py:/app/custom_japanese_tokenizer.py
      - ./data/deviceIDs.json:/app/data/deviceIDs.json
    environment:
      - DEFAULT_STREAM_READING_TIMEOUT_IN_SECONDS=60
  rasa-worker:
    volumes:
      - ./connectors:/app/connectors
      - ./custom_japanese_tokenizer.py:/app/custom_japanese_tokenizer.py
      - ./data/deviceIDs.json:/app/data/deviceIDs.json

custom_japanese_tokenizer.py (custom component)

import re
import typing
from typing import Any, Optional, Text, Dict, List, Type

from rasa.nlu.components import Component
from rasa.nlu.config import RasaNLUModelConfig
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
from rasa.nlu.constants import TOKENS_NAMES, MESSAGE_ATTRIBUTES
from rasa.shared.nlu.constants import (
    INTENT,
    INTENT_RESPONSE_KEY,
    RESPONSE_IDENTIFIER_DELIMITER,
    ACTION_NAME,
)

#class SudachiTokenizer(Tokenizer):
class CustomJapaneseTokenizer(Tokenizer):
    provides = [TOKENS_NAMES[attribute] for attribute in MESSAGE_ATTRIBUTES]

    defaults = {
        # Flag to check whether to split intents
        "intent_tokenization_flag": False,
        # Symbol on which intent should be split
        "intent_split_symbol": "_",
    }

    def __init__(self, component_config: Dict[Text, Any] = None) -> None:
        super().__init__(component_config)

        import MeCab

        self.tagger_obj = MeCab.Tagger("-Owakati")
        
    @classmethod
    def required_packages(cls) -> List[Text]:
        return ["sudachipy"]

    #def tokenize(self, text: Text) -> List[Token]:
    def tokenize(self, message: Message, attribute: Text) -> List[Token]:
        text = message.get(attribute)
        words = []
        #words = [m for m in self.tagger_obj.parse(text)]
        parsed_text = self.tagger_obj.parse(text)
        for i in parsed_text.split():
            words.append(i)
        print(f"\n---------***--------\n - Text: {text}\n - Parsed Text: {parsed_text}\n - Words: {words}")
        return self._convert_words_to_tokens(words, text)

@Vin this issue related to Rasa X or Rasa Open Source? I can not see any code/docker image for Rasa X?

@nik202 Issue is for Rasa X (error logs from Rasa X production)

I have custom tokenizer component in the pipeline which uses certain pip packages so I initially created a custom Rasa (2.8.14) Docker image using file Dockerfile_rasa then started Rasa X using :

sudo docker-compose up

which creates action server docker image using file Dockerfile_action_server I will also attach the docker-compose.yml and .env files

docker-compose.yml

version: "3.4"

x-database-credentials: &database-credentials
  DB_HOST: "db"
  DB_PORT: "5432"
  DB_USER: "${DB_USER:-admin}"
  DB_PASSWORD: "${DB_PASSWORD}"
  DB_LOGIN_DB: "${DB_LOGIN_DB:-rasa}"

x-rabbitmq-credentials: &rabbitmq-credentials
  RABBITMQ_HOST: "rabbit"
  RABBITMQ_USERNAME: "user"
  RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD}

x-redis-credentials: &redis-credentials
  REDIS_HOST: "redis"
  REDIS_PORT: "6379"
  REDIS_PASSWORD: ${REDIS_PASSWORD}
  REDIS_DB: "1"
  REDIS_CACHE_DB: "2"
  ACCEPTABLE_QUERY_COUNT_NUMBER: "50000"
  
x-duckling-credentials: &duckling-credentials
  RASA_DUCKLING_HTTP_URL: "http://duckling:8000"

x-nginx-host-variables: &nginx-host-variables
  RASA_X_HOST: "rasa-x:5002"
  RASA_USER_APP: "app:5055"
  RASA_PRODUCTION_HOST: "rasa-production:5005"

x-rasax-credentials: &rasax-credentials
  RASA_X_HOST: "http://rasa-x:5002"
  RASA_X_USERNAME: ${RASA_X_USERNAME:-admin}
  RASA_X_PASSWORD: ${RASA_X_PASSWORD:-}
  RASA_X_TOKEN: ${RASA_X_TOKEN}
  JWT_SECRET: ${JWT_SECRET}
  RASA_USER_APP: "http://app:5055"
  RASA_PRODUCTION_HOST: "http://rasa-production:5005"
  RASA_WORKER_HOST: "http://rasa-worker:5005"
  RASA_TOKEN: ${RASA_TOKEN}

x-rasa-credentials: &rasa-credentials
  <<: *rabbitmq-credentials
  <<: *rasax-credentials
  <<: *database-credentials
  <<: *redis-credentials
  <<: *duckling-credentials
  RASA_TOKEN: ${RASA_TOKEN}
  RASA_MODEL_PULL_INTERVAL: 10
  RABBITMQ_QUEUE: "rasa_production_events"
  RASA_TELEMETRY_ENABLED: ${RASA_TELEMETRY_ENABLED:-true}



x-rasa-services: &default-rasa-service
  restart: always
  image: "rasa_custom_component:rasa_custom_component"
  volumes:
      - ./.config:/.config
  expose:
    - "5005"
  command: >
    x
    --no-prompt
    --production
    --config-endpoint http://rasa-x:5002/api/config?token=${RASA_X_TOKEN}
    --port 5005
    --jwt-method HS256
    --jwt-secret ${JWT_SECRET}
    --auth-token '${RASA_TOKEN}'
    --cors "*"
  depends_on:
    - rasa-x
    - rabbit
    - redis

services:
  rasa-x:
    restart: always
    image: "rasa/rasa-x:${RASA_X_VERSION}"
    expose:
      - "5002"
    volumes:
      - ./models:/app/models
      - ./environments.yml:/app/environments.yml
      - ./credentials.yml:/app/credentials.yml
      - ./endpoints.yml:/app/endpoints.yml
      - ./logs:/logs
      - ./auth:/app/auth
    environment:
      <<: *database-credentials
      <<: *rasa-credentials
      SELF_PORT: "5002"
      DB_DATABASE: "${DB_DATABASE:-rasa}"
      RASA_MODEL_DIR: "/app/models"
      PASSWORD_SALT: ${PASSWORD_SALT}
      RABBITMQ_QUEUE: "rasa_production_events"
      RASA_X_USER_ANALYTICS: "0"
      SANIC_RESPONSE_TIMEOUT: "3600"
      RUN_DATABASE_MIGRATION_AS_SEPARATE_SERVICE: "true"
    depends_on:
      - db

  db-migration:
    entrypoint: ["python"]
    command: ["-m", "rasax.community.services.db_migration_service"]
    restart: always
    image: "rasa/rasa-x:${RASA_X_VERSION}"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || kill 1"]
      interval: 5s
      timeout: 1s
      retries: 3
      start_period: 2s
    expose:
      - "8000"
    environment:
      <<: *database-credentials
      RUN_DATABASE_MIGRATION_AS_SEPARATE_SERVICE: "true"
      MPLCONFIGDIR: "/tmp/.matplotlib"
    depends_on:
      - db

  rasa-production:
    <<: *default-rasa-service
    environment:
      <<: *rasa-credentials
      RASA_ENVIRONMENT: "production"
      DB_DATABASE: "tracker"
      MPLCONFIGDIR: "/tmp/.matplotlib"
      RASA_MODEL_SERVER: "http://rasa-x:5002/api/projects/default/models/tags/production"

  rasa-worker:
    <<: *default-rasa-service
    environment:
      <<: *rasa-credentials
      RASA_ENVIRONMENT: "worker"
      DB_DATABASE: "worker_tracker"
      MPLCONFIGDIR: "/tmp/.matplotlib"
      RASA_MODEL_SERVER: "http://rasa-x:5002/api/projects/default/models/tags/production"

  app:
    restart: always
    image: "rasa/rasa-x-demo:${RASA_X_DEMO_VERSION}"
    expose:
      - "5055"
    depends_on:
      - rasa-production

  db:
    restart: always
    image: "bitnami/postgresql:11.11.0"
    expose:
      - "5432"
    environment:
      POSTGRESQL_USERNAME: "${DB_USER:-admin}"
      POSTGRESQL_PASSWORD: "${DB_PASSWORD}"
      POSTGRESQL_DATABASE: "${DB_DATABASE:-rasa}"
    volumes:
      - ./db:/bitnami/postgresql

  rabbit:
    restart: always
    image: "bitnami/rabbitmq:3.8.17"
    environment:
      RABBITMQ_HOST: "rabbit"
      RABBITMQ_USERNAME: "user"
      RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD}
      RABBITMQ_DISK_FREE_RELATIVE_LIMIT: "0.1"
    expose:
      - "5672"

  duckling:
    restart: always
    image: "rasa/duckling:0.1.6.5-r2"
    expose:
      - "8000"
    command: ["duckling-example-exe", "--no-access-log", "--no-error-log"]

  nginx:
    restart: always
    image: "nginx:1.19"
    ports:
      - "80:8080"
      - "443:8443"
    volumes:
      - ./certs:/etc/certs
      - ./nginx-config-files/nginx.conf:/etc/nginx/nginx.conf
      - ./nginx-config-files/ssl.conf.template:/etc/nginx/templates/ssl.conf.template
      - ./nginx-config-files/rasax.nginx.template:/etc/nginx/templates/rasax.nginx.template
    environment:
      <<: *nginx-host-variables
    depends_on:
      - rasa-x
      - rasa-production
      - app

  redis:
    restart: always
    image: "bitnami/redis:6.2.4"
    environment:
      REDIS_PASSWORD: ${REDIS_PASSWORD}
    expose:
      - "6379"

.env file

# rasax specific settings
RASA_X_VERSION="0.42.6"
RASA_VERSION="2.8.14"
RASA_TOKEN=########
RASA_X_TOKEN=########
PASSWORD_SALT=########
JWT_SECRET=########
RABBITMQ_PASSWORD=########
DB_PASSWORD=########
REDIS_PASSWORD=########
RASA_TELEMETRY_ENABLED=false
RASA_X_DEMO_VERSION="0.42.6"

@Vin share me the docker ps screenshot?

@Vin how you are using or building the custom action image? are you I guess working on local machine or server side?

@nik202

docker ps result

for building the custom action image, in docker-compose.override.yml, this part and the Dockerfile_action_server file

services:
  app:
    build:
      context: .
      dockerfile: Dockerfile_action_server

@Vin its your local machine or server?

@nik202 It is on Server

@nik202 Thanks for the help but got it working, I was using the incorrect (default) rasa image instead of the custom image created in docker-compose.yml under (have corrected the file in above post as well)

x-rasa-services: &default-rasa-service
  restart: always
  image: "rasa_custom_component:rasa_custom_component"

@Vin great and congrats, please mark the above thread a solution for others.