Source code for clevercloud.CleverLemStem

[docs]def CleverLemStem(text): """ A preprocessor to conduct lemmatinzation and stemming on the text. Parameters ---------- text : str Input a string Returns ------- str The lemmatized and stemmed string Examples -------- >>> sample_text = "maximum crying feet" >>> CleverLemStem(sample_text) "maxim cry foot" """ # check the type of the imput if not isinstance(text, str): raise TypeError("The input value should be a string.") # import packages import nltk from nltk.stem import LancasterStemmer, WordNetLemmatizer from nltk.tokenize import RegexpTokenizer nltk.download('omw-1.4') nltk.download('wordnet') # lemmatization tokenizer = RegexpTokenizer(r'\w+') tokenized_text = tokenizer.tokenize(text) lem = WordNetLemmatizer() lem_text = ' '.join([lem.lemmatize(i) for i in tokenized_text]) # stemming tokenized_lem_text = tokenizer.tokenize(lem_text) stemmer = LancasterStemmer() clean_text = ' '.join([stemmer.stem(i) for i in tokenized_lem_text]) return clean_text