from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
# 1
docs1 = ['Alpha,Alpha,Beta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)
OUTPUT:
(0, 0) 2
(0, 1) 1
# 2
docs1 = ['Alpha,Alpha,Beta,Delta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)
OUTPUT:
(0, 0) 2
(0, 1) 1
(0, 2) 1
# 3
docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta']
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)
OUTPUT:
(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1
# 4
docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta',]
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)
OUTPUT:
(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1
(2, 0) 2
(2, 1) 1
print(word_count_vector.toarray())
OUTPUT:
[[2 1]
[2 1]
[2 1]]
# 5
docs1 = ['Alpha,Alpha,Beta', 'Alpha,Alpha,Beta', 'Alpha,Alpha,Beta,Delta',]
cv=CountVectorizer(stop_words=[])
word_count_vector=cv.fit_transform(docs1)
print(word_count_vector)
OUTPUT:
(0, 0) 2
(0, 1) 1
(1, 0) 2
(1, 1) 1
(2, 0) 2
(2, 1) 1
(2, 2) 1
print(word_count_vector.toarray())
OUTPUT:
[[2 1 0]
[2 1 0]
[2 1 1]]
# 6
# Iterating over a sparse matrix
for i in word_count_vector:
print(i[0].toarray()[0])
OUTPUT:
[2 1 0]
[2 1 0]
[2 1 1]
Pages
- Index of Lessons in Technology
- Index of Book Summaries
- Index of Book Lists And Downloads
- Index For Job Interviews Preparation
- Index of "Algorithms: Design and Analysis"
- Python Course (Index)
- Data Analytics Course (Index)
- Index of Machine Learning
- Postings Index
- Index of BITS WILP Exam Papers and Content
- Lessons in Investing
- Index of Math Lessons
- Index of Management Lessons
- Book Requests
- Index of English Lessons
- Index of Medicines
- Index of Quizzes (Educational)
Exploring skLearn's CountVectorizer
Subscribe to:
Comments (Atom)
No comments:
Post a Comment