from sklearn.feature_extraction.text import CountVectorizer from nltk.corpus import stopwords # 1 docs1 = ['Alpha,Alpha,Beta'] cv=CountVectorizer(stop_words=[]) word_count_vector=cv.fit_transform(docs1) print(word_count_vector) OUTPUT: (0, 0) 2 (0, 1) 1 # 2 docs1 = ['Alpha,Alpha,Beta,Delta'] cv=CountVectorizer(stop_words=[]) word_count_vector=cv.fit_transform(docs1) print(word_count_vector) OUTPUT: (0, 0) 2 (0, 1) 1 (0, 2) 1 # 3 docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta'] cv=CountVectorizer(stop_words=[]) word_count_vector=cv.fit_transform(docs1) print(word_count_vector) OUTPUT: (0, 0) 2 (0, 1) 1 (1, 0) 2 (1, 1) 1 # 4 docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta',] cv=CountVectorizer(stop_words=[]) word_count_vector=cv.fit_transform(docs1) print(word_count_vector) OUTPUT: (0, 0) 2 (0, 1) 1 (1, 0) 2 (1, 1) 1 (2, 0) 2 (2, 1) 1 print(word_count_vector.toarray()) OUTPUT: [[2 1] [2 1] [2 1]] # 5 docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta,Delta',] cv=CountVectorizer(stop_words=[]) word_count_vector=cv.fit_transform(docs1) print(word_count_vector) OUTPUT: (0, 0) 2 (0, 1) 1 (1, 0) 2 (1, 1) 1 (2, 0) 2 (2, 1) 1 (2, 2) 1 print(word_count_vector.toarray()) OUTPUT: [[2 1 0] [2 1 0] [2 1 1]] # 6 # Iterating over a sparse matrix for i in word_count_vector: print(i[0].toarray()[0]) OUTPUT: [2 1 0] [2 1 0] [2 1 1]
Exploring skLearn's CountVectorizer
Subscribe to:
Posts (Atom)
No comments:
Post a Comment