I have been working with Rasa for months and i have an unexpected bug found. When rasa is trying to extract entities but the Spacy Extractor is not doing its job. I have another extractor in my config file.
Someone have an idea why Spacy is not extracting?
config.yml
# The config recipe.
# https://rasa.com/docs/rasa/model-configuration/
recipe: default.v1
# The assistant project unique identifier
# This default value must be replaced with a unique assistant name within your deployment
assistant_id: 20240104-111410-factorial-skunk
# Configuration for Rasa NLU.
# https://rasa.com/docs/rasa/nlu/components/
language: es
pipeline:
- name: SpacyNLP
model: "es_core_news_lg"
case_sensitive: false
- name: SpacyTokenizer
- name: SpacyFeaturizer
pooling: max
- name: LexicalSyntacticFeaturizer
- name: CountVectorsFeaturizer
- name: CountVectorsFeaturizer
analyzer: "char_wb"
min_ngram: 1
max_ngram: 4
OOV_token: "_oov_"
- name: RegexFeaturizer
- name: LanguageModelFeaturizer
model_weights: "dccuchile/bert-base-spanish-wwm-cased"
model_name: "bert"
cache_dir: null
- name: "SklearnIntentClassifier"
# Specifies the list of regularization values to
# cross-validate over for C-SVM.
# This is used with the ``kernel`` hyperparameter in GridSearchCV.
C: [1, 2, 5, 10, 20, 100]
# Specifies the kernel to use with C-SVM.
# This is used with the ``C`` hyperparameter in GridSearchCV.
kernels: ["linear"]
# Gamma parameter of the C-SVM.
"gamma": [0.1]
# We try to find a good number of cross folds to use during
# intent training, this specifies the max number of folds.
"max_cross_validation_folds": 5
# Scoring function used for evaluating the hyper parameters.
# This can be a name or a function.
"scoring_function": "f1_weighted"
intent_classification:: false
- name: RegexEntityExtractor
# text will be processed with case insensitive as default
case_sensitive: False
# use lookup tables to extract entities
use_lookup_tables: True
# use regexes to extract entities
use_regexes: True
# use match word boundaries for lookup table
use_word_boundaries: True
- name: EntitySynonymMapper
- name: SpacyEntityExtractor
dimensions: [
"user_name",
"product",
"mood",
"symptom",
"allergy",
"product_selected",
"search_term",
"vitamin",
"bottle",
"protein",
"electrolytes",
"omega",
"coffee",
"collagen",
"cream",
"body_oil",
"candle",
"DATE",
"GPE",
"want",
"how",
"can",
"create",
"acquire",
"cost",
"procedure",
"order",
"order_number",
"buy_word",
"shipping_word",
"help_word",
"done_word",
"time_word",
"status_word",
"services_word",
"what_do_you_know_about",
"when_does_arrive",
"how_long_does_it_take",
"have_problem",
"where_is",
"order_no_info",
"shipping_tracking_number",
"track_the_order",
"payment_methods",
"invoice",
"payment_captured",
"promo",
"promo_value",
]
- name: DIETClassifier
random_seed: 42
number_of_transformer_layers: 4
transformer_size: 256
drop_rate: 0.2
weight_sparsity: 0.7
batch_size: [64, 256]
epochs: 20
intent_classification:: false
- name: EntitySynonymMapper
- name: CRFEntityExtractor
features: [
["low", "title", "upper"],
[
"bias",
"low",
"title",
"prefix5",
"prefix2",
"suffix5",
"suffix3",
"suffix2",
"upper",
"digit",
"pos",
"pos2",
"pattern",
"text_dense_features"
],
["low", "title", "upper"]
]
max_iterations: 50
L1_c: 0.1
L2_c: 0.1
featurizers: []
embedding_dimension: 30
- name: ResponseSelector
epochs: 100
constrain_similarities: true
model_confidence: softmax
# Configuration for Rasa Core.
# https://rasa.com/docs/rasa/core/policies/
policies:
- name: MemoizationPolicy
- name: TEDPolicy
max_history: 5
epochs: 100
constrain_similarities: true
model_confidence: softmax
- name: RulePolicy
restrict_rules: False
core_fallback_threshold: 0.3
core_fallback_action_name: "action_default_fallback"
enable_fallback_prediction: True
domain.yml
version: '3.1'
intents:
- greet
- goodbye
- affirm
- deny
- mood_great
- mood_unhappy
- bot_challenge
- supply_name:
use_entities:
- user_name
- supply_email:
use_entities:
- email
- final_user_scenarios:
use_entities:
- search_term
- symptom
- product
- category
- mood
- allergy
- vitamin
- bottle
- protein
- electrolytes
- omega
- coffee
- collagen
- cream
- body_oil
- candle
- catalog
- help
- type_product:
use_entities:
- product_selected
- ask_how_to_order:
use_entities:
- want
- how
- can
- create
- acquire
- cost
- procedure
- order
- buy_word
- services_word
- ask_how_long_will_take_to_deliver_an_order:
use_entities:
- how_long_does_it_take
- ask_payment_methods:
use_entities:
- payment_methods
- info_about_order:
use_entities:
- buy_word
- shipping_word
- help_word
- done_word
- time_word
- status_word
- search_term
- what_do_you_know_about
- when_does_arrive
- how_long_does_it_take
- have_problem
- where_is
- order_no_info
- search_order:
use_entities:
- order_number
- no_info_about_shipment_tracking:
use_entities:
- order_number
- track_the_order
- shipping_tracking_number
- ask_for_invoice:
use_entities:
- invoice
- ask_in_how_many_time_payment_will_be_captured:
use_entities:
- payment_captured
- ask_for_promotions:
use_entities:
promo
promo_value
entities:
- session_created
- session_restarted
- user_name
- email
- product
- category
- mood
- list_products
- total_of_products
- product_selected_details
- product_selected
- allergy
- symptom
- search_term
- vitamin
- bottle
- protein
- electrolytes
- omega
- coffee
- collagen
- cream
- body_oil
- candle
- want
- how
- can
- create
- acquire
- cost
- procedure
- order
- order_number
- buy_word
- shipping_word
- help_word
- done_word
- time_word
- status_word
- services_word
- what_do_you_know_about
- shipping_tracking_number
- track_the_order
- payment_methods
- invoice
- payment_captured
- catalog
- promo
- promo_value
slots:
session_created:
type: bool
mappings:
- type: from_entity
entity: session_created
session_restarted:
type: bool
mappings:
- type: from_entity
entity: session_restarted
user_name:
type: text
mappings:
- type: from_entity
entity: user_name
email:
type: text
mappings:
- type: from_entity
entity: email
product:
type: text
mappings:
- type: from_entity
entity: product
intent: final_user_scenarios
- type: from_entity
entity: vitamin
intent: final_user_scenarios
- type: from_entity
entity: bottle
intent: final_user_scenarios
- type: from_entity
entity: protein
intent: final_user_scenarios
- type: from_entity
entity: electrolytes
intent: final_user_scenarios
- type: from_entity
entity: omega
intent: final_user_scenarios
- type: from_entity
entity: coffee
intent: final_user_scenarios
- type: from_entity
entity: collagen
intent: final_user_scenarios
- type: from_entity
entity: cream
intent: final_user_scenarios
- type: from_entity
entity: body_oil
intent: final_user_scenarios
- type: from_entity
entity: candle
intent: final_user_scenarios
category:
type: text
mappings:
- type: from_entity
entity: category
mood:
type: text
mappings:
- type: from_entity
entity: mood
allergy:
type: text
mappings:
- type: from_entity
entity: allergy
list_products:
type: list
mappings:
- type: from_entity
entity: list_products
total_of_products:
type: float
min_value: 0
mappings:
- type: from_entity
entity: total_of_products
product_selected:
type: text
mappings:
- type: from_entity
entity: product_selected
product_selected_details:
type: text
mappings:
- type: from_entity
entity: product_selected_details
order_number:
type: text
mappings:
- type: from_entity
entity: order_number
promo_value:
type: text
mappings:
- type: from_entity
entity: promo_value
client_agreed_information:
type: bool
mappings:
- type: from_intent
value: true
intent: affirm
- type: from_intent
value: false
intent: deny
client_agreed_open_page:
type: bool
mappings:
- type: from_intent
value: true
intent: affirm
- type: from_intent
value: false
intent: deny
# Note: Only I share the entity and slots definition