Rasa custom components

Chào mọi người em đang tập tành làm chatbot với rasa nhưng gặp 1 vấn đề sau. Em có 1 source code dùng bản rasa 2.x custom thêm component tokenizer của tiếng Việt, em không biết chuyển sang version 3.x làm sao mong mọi người giúp đỡ.

from future import annotations from typing import Any, Dict, List, Optional, Text

from rasa.engine.graph import ExecutionContext from rasa.engine.recipes.default_recipe import DefaultV1Recipe from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer from rasa.shared.nlu.training_data.message import Message

import py_vncorenlp import re

@DefaultV1Recipe.register( DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=False ) class VnCoreNLP_Tokenizer(Tokenizer): “”“Creates features for entity extraction.”“”

@staticmethod
def get_default_config() -> Dict[Text, Any]:
    """Returns the component's default config."""
    return {
        # This *must* be added due to the parent class.
        "intent_tokenization_flag": False,
        # This *must* be added due to the parent class.
        "intent_split_symbol": "_",
        # This is a, somewhat silly, config that we pass
        # "only_alphanum": True,
    }

def __init__(self, config: Dict[Text, Any]) -> None:
    """Initialize the tokenizer."""
    super(VnCoreNLP_Tokenizer, self).__init__(config)
    py_vncorenlp.download_model(save_dir='/workspace/nhitny/rasa/vncorenlp/')
    self.annotator = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir='/workspace/nhitny/rasa/vncorenlp/')

def clean_text(self,text):
    pattern = r"[^A-Za-z0-9(),!?\'`_aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ]"
    text = re.sub(pattern, " ", text)
    text = re.sub(r"\(", " ( ", text)
    text = re.sub(r"\)", " ) ", text)
    text = re.sub(r"\?", " ? ", text)
    text = re.sub(r",", " , ", text)
    text = re.sub(r"\s{2,}", " ", text)
    return text.strip().lower()

@classmethod
def create(
    cls,
    config: Dict[Text, Any],
    model_storage: ModelStorage,
    resource: Resource,
    execution_context: ExecutionContext,
) -> VnCoreNLP_Tokenizer:
    return cls(config)

def tokenize(self, message: Message, attribute: Text) -> List[Token]:
# def tokenize(self, text):
    text = message.get(attribute)
    text = self.clean_text(text)
    # # words = self.annotator.tokenize(text)
    # words = self.annotator.word_segment(text)
    # # words = [i for seg in words for i in seg]
    # list_word=[]
    # for word in words:
    #     list_word.append(word.replace("_", " "))
    # return self._convert_words_to_tokens(list_word, ' '.join(list_word))
    words = [w for w in text.split(" ") if w]

    # if we removed everything like smiles `:)`, use the whole text as 1 token
    if not words:
        words = [text]

    # the ._convert_words_to_tokens() method is from the parent class.
    tokens = self._convert_words_to_tokens(words, text)

    return self._apply_token_pattern(tokens)

Em có chuyển sang version 3.x như trên như khi train bị lỗi ạ. Lỗi như này ạ: File “/workspace/nhitny/miniconda3/envs/rasa/lib/python3.9/site-packages/rasa/engine/graph.py”, line 407, in _load_component raise GraphComponentException( rasa.engine.exceptions.GraphComponentException: Error initializing graph component for node run_custom_components.vietname_tokenizer.VnCoreNLP_Tokenizer0.