Exploring skLearn's CountVectorizer


from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords

# 1

docs1 = ['Alpha,Alpha,Beta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)

OUTPUT:
(0, 0) 2
(0, 1) 1

# 2

docs1 = ['Alpha,Alpha,Beta,Delta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)

OUTPUT:

(0, 0) 2
(0, 1) 1
(0, 2) 1

# 3

docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)

OUTPUT:

(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1

# 4

docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta',]
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)

OUTPUT:
(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1
(2, 0) 2
(2, 1) 1

print(word_count_vector.toarray())

OUTPUT:

[[2 1]
 [2 1]
 [2 1]]

# 5

docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta,Delta',]
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)

OUTPUT:

(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1
(2, 0) 2
(2, 1) 1
(2, 2) 1

print(word_count_vector.toarray())

OUTPUT:

[[2 1 0]
 [2 1 0]
 [2 1 1]]

# 6

# Iterating over a sparse matrix
for i in word_count_vector:
    print(i[0].toarray()[0])
    
OUTPUT:

[2 1 0]
[2 1 0]
[2 1 1]

No comments:

Post a Comment