Rasa custom components

Nhi · July 8, 2023, 1:41pm

Chào mọi người em đang tập tành làm chatbot với rasa nhưng gặp 1 vấn đề sau. Em có 1 source code dùng bản rasa 2.x custom thêm component tokenizer của tiếng Việt, em không biết chuyển sang version 3.x làm sao mong mọi người giúp đỡ.

from future import annotations from typing import Any, Dict, List, Optional, Text

from rasa.engine.graph import ExecutionContext from rasa.engine.recipes.default_recipe import DefaultV1Recipe from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer from rasa.shared.nlu.training_data.message import Message

import py_vncorenlp import re

@DefaultV1Recipe.register( DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=False ) class VnCoreNLP_Tokenizer(Tokenizer): “”“Creates features for entity extraction.”“”

@staticmethod
def get_default_config() -> Dict[Text, Any]:
    """Returns the component's default config."""
    return {
        # This *must* be added due to the parent class.
        "intent_tokenization_flag": False,
        # This *must* be added due to the parent class.
        "intent_split_symbol": "_",
        # This is a, somewhat silly, config that we pass
        # "only_alphanum": True,
    }

def __init__(self, config: Dict[Text, Any]) -> None:
    """Initialize the tokenizer."""
    super(VnCoreNLP_Tokenizer, self).__init__(config)
    py_vncorenlp.download_model(save_dir='/workspace/nhitny/rasa/vncorenlp/')
    self.annotator = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir='/workspace/nhitny/rasa/vncorenlp/')

def clean_text(self,text):
    pattern = r"[^A-Za-z0-9(),!?\'`_aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ]"
    text = re.sub(pattern, " ", text)
    text = re.sub(r"\(", " ( ", text)
    text = re.sub(r"\)", " ) ", text)
    text = re.sub(r"\?", " ? ", text)
    text = re.sub(r",", " , ", text)
    text = re.sub(r"\s{2,}", " ", text)
    return text.strip().lower()

@classmethod
def create(
    cls,
    config: Dict[Text, Any],
    model_storage: ModelStorage,
    resource: Resource,
    execution_context: ExecutionContext,
) -> VnCoreNLP_Tokenizer:
    return cls(config)

def tokenize(self, message: Message, attribute: Text) -> List[Token]:
# def tokenize(self, text):
    text = message.get(attribute)
    text = self.clean_text(text)
    # # words = self.annotator.tokenize(text)
    # words = self.annotator.word_segment(text)
    # # words = [i for seg in words for i in seg]
    # list_word=[]
    # for word in words:
    #     list_word.append(word.replace("_", " "))
    # return self._convert_words_to_tokens(list_word, ' '.join(list_word))
    words = [w for w in text.split(" ") if w]

    # if we removed everything like smiles `:)`, use the whole text as 1 token
    if not words:
        words = [text]

    # the ._convert_words_to_tokens() method is from the parent class.
    tokens = self._convert_words_to_tokens(words, text)

    return self._apply_token_pattern(tokens)

Em có chuyển sang version 3.x như trên như khi train bị lỗi ạ. Lỗi như này ạ: File “/workspace/nhitny/miniconda3/envs/rasa/lib/python3.9/site-packages/rasa/engine/graph.py”, line 407, in _load_component raise GraphComponentException( rasa.engine.exceptions.GraphComponentException: Error initializing graph component for node run_custom_components.vietname_tokenizer.VnCoreNLP_Tokenizer0.

Topic		Replies	Views
Error when create custom Tokenizer Rasa Open Source	2	892	April 4, 2022
Getting Custom Component to Work Rasa Open Source	8	1545	September 7, 2021
Custom Component Rasa version 3 Rasa Open Source	0	901	February 16, 2022
Custom sentiment analysis components issue with Core Rasa Open Source	1	1162	January 27, 2022
Custom Components Tutorials, Resources & Videos sentiment-analyzer	4	1759	April 18, 2020

Rasa custom components

Related topics