Hi, can anyone help me with my sentiment, so i followed the blog post, and here is my code:
from rasa.nlu.components import Component
from rasa.nlu import utils
from rasa.nlu.model import Metadata
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize # or use some other tokenizer
import json
import os
import typing
from typing import Any, Optional, Text, Dict
SENTIMENT_MODEL_FILE_NAME = "sentiment_classifier.pkl"
class SentimentAnalyzer(Component):
"""A custom sentiment analysis component"""
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')
def __init__(self, component_config=None):
super(SentimentAnalyzer, self).__init__(component_config)
def train(self, training_data, cfg, **kwargs):
"""Load the sentiment polarity labels from the text
file, retrieve training tokens and after formatting
data train the classifier."""
self.training = []
with open('./default_dataset_training.json', 'r') as raw_training_data:
training_data = json.load(raw_training_data)
print(training_data)
neg = training_data['neg']
for val in neg:
self.training.append((val[0]['value'], 'neg'))
pos = training_data['pos']
for val_pos in pos:
self.training.append((val_pos[0]['value'], 'pos'))
processed_training = []
for t in self.training:
processed_training.append((self.preprocessing(word_tokenize(t[0])), t[1]))
self.clf = NaiveBayesClassifier.train(processed_training)
def convert_to_rasa(self, value, confidence):
"""Convert model output into the Rasa NLU compatible output format."""
entity = {"value": value,
"confidence": confidence,
"entity": "sentiment",
"extractor": "sentiment_extractor"}
return entity
def preprocessing(self, tokens):
"""Create bag-of-words representation of the training examples."""
return ({word: True for word in tokens})
def process(self, message, **kwargs):
"""Retrieve the tokens of the new message, pass it to the classifier
and append prediction results to the message class."""
if not self.clf:
# component is either not trained or didn't
# receive enough training data
entity = None
else:
tokens = [t.text for t in message.get("tokens")]
processed = self.preprocessing(tokens)
pred = self.clf.prob_classify(processed)
sentiment = pred.max()
confidence = pred.prob(sentiment)
entity = self.convert_to_rasa(sentiment, confidence)
message.set("entities", [entity], add_to_output=True)
def persist(self, file_name, model_dir):
"""Persist this model into the passed directory."""
classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
utils.json_pickle(classifier_file, self)
return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}
@classmethod
def load(cls,
meta: Dict[Text, Any],
model_dir=None,
model_metadata=None,
cached_component=None,
**kwargs):
file_name = meta.get("classifier_file")
classifier_file = os.path.join(model_dir, file_name)
return utils.json_unpickle(classifier_file)
Here is my config:
language: en
pipeline:
- name: "nlp_spacy"
- name: "tokenizer_spacy"
- name: "sentiment.SentimentAnalyzer"
- name: "ner_crf"
- name: "ner_spacy"
- name: "ner_synonyms"
- name: CountVectorsFeaturizer
- intent_split_symbol: +
intent_tokenization_flag: true
name: EmbeddingIntentClassifier
When i tried to test my NLU model, i always get the same result:
{
"value": "neg",
"confidence": 0.696105702364395,
"entity": "sentiment",
"extractor": "sentiment_extractor"
}
But when i tried to test the code with same training data:
from rasa.nlu.components import Component
from rasa.nlu import utils
from rasa.nlu.model import Metadata
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize # or use some other tokenizer
import json
import os
import typing
from typing import Any, Optional, Text, Dict
from nltk.tokenize import word_tokenize
training = []
def preprocessing(tokens):
"""Create bag-of-words representation of the training examples."""
return ({word: True for word in tokens})
with open('./default_dataset_training.json', 'r') as raw_training_data:
training_data = json.load(raw_training_data)
print(training_data)
neg = training_data['neg']
for val in neg:
training.append((val[0]['value'], 'neg'))
pos = training_data['pos']
for val_pos in pos:
training.append((val_pos[0]['value'], 'pos'))
processed_training = []
for t in training:
processed_training.append((preprocessing(word_tokenize(t[0])), t[1]))
clf = NaiveBayesClassifier.train(processed_training)
while True:
text = input(">")
tokenize = word_tokenize(text)
processed = preprocessing(tokenize)
pred = clf.prob_classify(processed)
sentiment = pred.max()
confidence = pred.prob(sentiment)
print(sentiment)
print(confidence)
It is working fine. Can someone help me with this? Thanks