Self.clf is not accessible

import logging logging.basicConfig(format=’%(asctime)s : %(levelname)s : %(message)s’, level=logging.INFO) import gensim import spacy nlp = spacy.load(‘en_core_web_sm’) from spacy.lang.en.stop_words import STOP_WORDS #regex import glob #regular expression import re from string import digits from gensim import models import numpy as np import nltk from nltk.stem.porter import *
from gensim import corpora, models, similarities from pprint import pprint
from collections import defaultdict import nltk from nltk.corpus import stopwords import numpy as np from time import time import pandas as pd import datetime import pickle from gensim.models import Word2Vec import typing from typing import Any, Optional, Text, Dict import os

import pickle

from rasa_nlu.components import Component from rasa_nlu import utils from rasa_nlu.model import Metadata

SIMILARITY_MODEL_FILE_NAME = “text_similarity.pkl”

class TextSimilarity(Component): name = “TextSimilarity” provides = [“Similarity”] requires = [] defaults = {} language_list = [“en”] user_ip="" w2v_corpus = [] # Documents to train word2vec on wmd_corpus = [] # Documents to run queries against documents = [] # wmd_corpus, with no pre-processing (so we can see the original documents). corpus_texts=[] model_word2vec="" questions=[]

def __init__(self, component_config=None, model_word2vec=None ):
    super(TextSimilarity, self).__init__(component_config) 
    self.model_word2vec=model_word2vec   

def getTrainingData(self, training_data):
    myDict={}         
    training_data = training_data.training_examples
    for t in training_data:
         intent=t.get('intent')
         
         if intent in myDict.keys():
             myDict[intent]= myDict[intent]+" "+t.text
         else:
             myDict[intent]=t.text
             self.questions.append(t.text)                 
    print("printing all the questions",len(self.questions))         
    print(self.questions)      
    return myDict

def removeNuminStr(self, string1):
    s=""
    remove_digits = str.maketrans('', '', digits)
    res = string1.translate(remove_digits)   
    res=res.split('.')
    for r in res:
        if(r or len(r)>1):
            s=r
    return s

def pre_Preocess(self, content_FAQ):
    print("I am in pre_Preocess" )
    #Pre-process the FAQ's to remove the stop words, punchuations,numbers spaces and get the root words
    texts, article=[], []
    vec_texts= []
    vec_article= []
    aword=""     
    spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS
    #customized stop words, we can add more to this
    my_stop_words=[u'please', u'be', u'a' , u'A', u'what', u'who' , u'What', u'aaa', u'where', u'\'s',u'\n ']
    for stopword in my_stop_words:
        lexeme=nlp.vocab[stopword]
        lexeme.is_stop= True  
        #adding the lemmatized words to the corpus
    for query in content_FAQ:
            doc = nlp(query.lower())   
            for w in doc:        
                if w.text!='\n' and w.text!='\t ' and not w.is_punct and not w.like_num and not w.is_stop and len(w)>1:
                    if(w.lemma_.isalpha() == False):
                        removed_num=self.removeNuminStr(w.lemma_)
                        removed_num=nlp(removed_num)                  
                        for word in removed_num:
                            if not word.is_stop and word and len(word)>0:
                                aword=word
                                article.append(aword.lemma_)                                                                      
                    else:
                        article.append(w.lemma_)

print(article)

print("_________________________________________________________________________________________")

            texts.append(article)        
            article=[]  
    return texts    


def train(self, training_data, cfg, **kwargs):
    df_ExpectedVar=[]
    global corpus_texts
    global w2v_corpus
    global wmd_corpus
    global documents
    global model_word2vec
    print("I am in train")
   
    corpus= self.getTrainingData(training_data)      
    for c in corpus:
        df_ExpectedVar.append(corpus[c])       
     #calling the pre_process
    corpus_texts= self.pre_Preocess(df_ExpectedVar)  
    print(len(corpus_texts))
   
    with open('corpus', 'wb') as fp:
        pickle.dump(corpus_texts, fp)    
    
     # Add to corpus for training Word2Vec.
    w2v_corpus=corpus_texts
    print("w2v_corpus",w2v_corpus)
    # Add to corpus for similarity queries.
    wmd_corpus=corpus_texts
    documents=df_ExpectedVar
    #Training the Word2Vec model
    model = gensim.models.Word2Vec (w2v_corpus, size=150, workers=3)
    tt = datetime.datetime.now().strftime("%Y%m%d%H%S")     
    model.save("./word2vec_model/model_word2vec"+ tt +".model")
    self.model_word2vec = model
    return self.model_word2vec      

def pre_process_userinput(self, query):
    print("I am in pre_process_userinput" )
    #user input should also be pre preocessed same like dataset
    article=[]
    doc = nlp(query)   
    for w in doc:        
        if w.text!='\n' and w.text!='\t ' and not w.is_punct and not w.like_num and not w.is_stop and len(w)>1:
            article.append(w.lemma_)
    #print("User input in lemmatized format:",article)                        
    return article

def process(self, message, **kwargs):
    print("I am in process")
    columns = ['Similarities','FAQs']
    rows=[]
    jsonData={}
    global corpus_texts
    global model_word2vec
    global documents
    global wmd_corpus
    userinput=message.text.lower()
    print("userinput",userinput)
    user_ip= self.pre_process_userinput(userinput) 
    with open ('corpus', 'rb') as fp:
        wmd_corpus = pickle.load(fp)
    #self.model_word2vec = Word2Vec.load('./word2vec_model')
    print("wmd_corpus",len(wmd_corpus))
    from gensim.similarities import WmdSimilarity
    num_best = 5
    instance = WmdSimilarity(wmd_corpus, self.model_word2vec, num_best=5)
    sims = instance[user_ip]  # A query is simply a "look-up" in the similarity class.
    # Print the query and the retrieved documents, together with their similarities.
    print("printing all the questions",len(self.questions))   
    print("this is length of the questions :::::",len(documents))
    print("this is length of the questions :::::",len(wmd_corpus))
    print("Displaying the query" )
    print('Query: ', message.text)
    print("\n")
    for i in range(num_best):  
        jsonSim={}
        print('similarity score =', sims[i][1])       
        print(self.questions[sims[i][0]])
        print("\n")
        row =[sims[i][1],self.questions[sims[i][0]]]
        rows.append(row)
        jsonSim["Similarity"]= sims[i][1]
        jsonSim["FAQ"]=self.questions[sims[i][0]]
        jsonData[i]= jsonSim
        
        
    final_scores = pd.DataFrame(rows, columns=columns) 
    print("jsonData",jsonData)
   # Similarity = self.convert_to_rasa(final_scores)
    message.set("Similarity",jsonData,add_to_output=True)    

def persist(self, model_dir):
    print("I am in persist")
    """Persist this model into the passed directory."""
    similarity_file = os.path.join(model_dir, SIMILARITY_MODEL_FILE_NAME)
    print(similarity_file)
    with open(similarity_file, 'wb') as f:
        pickle.dump(self, f)    
    return {"similarity_file": SIMILARITY_MODEL_FILE_NAME}

@classmethod
def load(cls,
         model_dir=None,
         model_metadata=None,
         cached_component=None,
         **kwargs):
    meta = model_metadata.for_component(cls.name)
    file_name = meta.get("similarity_file", SIMILARITY_MODEL_FILE_NAME)
    similarity_file = os.path.join(model_dir, file_name)
    if os.path.exists(similarity_file):
        return utils.pycloud_unpickle(similarity_file)
    else:
        return cls(meta)

This is custom component for text similarity.

I want to access self.model_word2vec in process.

Can anyone please tell me how to access it.

It is really hard to read the code you posted. Can you please reformat it using ``` around your code? Thanks.