Source code for clevercloud.CleverStopwords

# author: Adrianne Leung
# date: Jan 2022

[docs]def CleverStopwords(words): """ A comprehensive list of English stopwords that allow adding more customized words. Parameters ---------- words : set of strings Input a set of words that need to be included in the stopwords Returns ------- set of strings The complete set of stopwords Examples -------- >>> sample_words = {'disney', 'paris'} >>> CleverStopwords(sample_words) """ import nltk from nltk.corpus import stopwords # Download stopwords from NLTK library nltk.download('stopwords') # Test input variable type if not isinstance(words, set): raise TypeError("Input variable should be a set.") # Test data type of values of input variable for item in words: if not isinstance(item, str): raise TypeError("Each element of the input should be a string.") # NLTK English stopwords stopwords = set(stopwords.words("english")) try: nltk.download("stopwords") except: raise ImportError("Stopwords are not downloaded from NLTK.") new_stopwords = words # Check for unique new stopwords for w in new_stopwords: if w is not stopwords: # Add customised new words to the existing NLTK stopwords stopwords_all = stopwords.union(stopwords, new_stopwords) return stopwords_all