here is my chatbot-nlp folder
my data contains nlu ans stories .
my config file # Configuration for Rasa NLU.
language: fr_core_news_md
pipeline:
- name: “SpacyNLP”
- name: “SpacyTokenizer”
- name: “sentiment.SentimentAnalyzer”
- name: “SpacyFeaturizer”
- name: “WhitespaceTokenizer”
- name: “RegexFeaturizer”
- name: “CRFEntityExtractor”
- name: “EntitySynonymMapper”
- name: “CountVectorsFeaturizer”
- name: “SklearnIntentClassifier”
Configuration for Rasa Core.
policies:
- name: MemoizationPolicy
- name: KerasPolicy
- name: MappingPolicy
- name: FormPolicy
- name: “FallbackPolicy”
nlu_threshold: 0.25
core_threshold: 0.3
fallback_action_name: “action_my_fallback”
labels:
pos
pos
neu
neu
neg
neg
sentiment.py
from rasa.nlu.components import Component
from rasa.nlu import utils
from rasa.nlu.model import Metadata
import nltk
from nltk.classify import NaiveBayesClassifier
import os
import typing
from typing import Any, Optional, Text, Dict
SENTIMENT_MODEL_FILE_NAME = “sentiment_classifier.pkl”
class SentimentAnalyzer(Component):
“”“A custom sentiment analysis”“”
name = “sentiment”
provides = [“entities”]
requires = [“tokens”]
defaults = {}
language_list = [“fr_core_news_md”]
print(‘initialised the class’)
def __init__(self, component_config=None):
super(SentimentAnalyzer, self).__init__(component_config)
def train(self, training_data, cfg, **kwargs):
"""Load the sentiment polarity labels from the text
file, retrieve training tokens and after formatting
data train the classifier."""
with open('labels.txt','r') as f:
labels = f.read().splitlines()
training_data = training_data.training_examples #list of Message objects
tokens = [list(map(lambda x: x.text, t.get('tokens'))) for t in training_data]
processed_tokens = [self.preprocessing(t) for t in tokens]
labeled_data = [(t, x) for t,x in zip(processed_tokens, labels)]
self.clf = NaiveBayesClassifier.train(labeled_data)
def convert_to_rasa(self, value, confidence):
"""Convert model output into the Rasa NLU compatible output format."""
entity = {"value": value,
"confidence": confidence,
"entity": "sentiment",
"extractor": "sentiment_extractor"}
return entity
def preprocessing(self, tokens):
"""Create bag-of-words representation of the training examples."""
return ({word: True for word in tokens})
def process(self, message, **kwargs):
"""Retrieve the tokens of the new message, pass it to the classifier
and append prediction results to the message class."""
if not self.clf:
# component is either not trained or didn't
# receive enough training data
entity = None
else:
tokens = [t.text for t in message.get("tokens")]
tb = self.preprocessing(tokens)
pred = self.clf.prob_classify(tb)
sentiment = pred.max()
confidence = pred.prob(sentiment)
entity = self.convert_to_rasa(sentiment, confidence)
message.set("entities", [entity], add_to_output=True)
def persist(self, file_name, model_dir):
"""Persist this model into the passed directory."""
classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
utils.json_pickle(classifier_file, self)
return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}
@classmethod
def load(cls,
meta: Dict[Text, Any],
model_dir=None,
model_metadata=None,
cached_component=None,
**kwargs):
file_name = meta.get("classifier_file")
classifier_file = os.path.join(model_dir, file_name)
return utils.json_unpickle(classifier_file)