import pandas as pd
from sentence_transformers import SentenceTransformer
sbert_model = SentenceTransformer('bert-base-nli-mean-tokens')
df = pd.read_csv('nytEditorialSnippets_GroundTruth.txt', sep = '\t')
df_10 = df[0:10]
df_100 = df[0:100]
# 1. Using Sentence Encoder in a function on every record
def get_embedding(input_sentence):
return sbert_model.encode([input_sentence])[0]
%%timeit
df_out_1 = df_100['text'].apply(get_embedding)
9.15 s ± 317 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%time
df_out_1 = df_100['text'].apply(get_embedding)
CPU times: user 18.3 s, sys: 112 ms, total: 18.5 s
Wall time: 9.25 s
%%time
df_out_1 = df_10['text'].apply(get_embedding)
CPU times: user 1.97 s, sys: 7.88 ms, total: 1.98 s
Wall time: 997 ms
df_out_1
0 [-0.7970602, 0.47616163, 0.2621567, 0.38846374...
1 [-0.32450542, -0.10945253, 0.6443658, 0.212320...
2 [-0.2602994, -0.0036350375, 1.2917686, 0.12602...
3 [0.5173101, -0.86385506, 1.5003084, 0.76273316...
4 [-0.19630705, 1.611963, 0.8502133, 0.059544455...
...
95 [-0.7843676, 0.70446295, -0.86373883, 0.096476...
96 [-0.047543377, -1.0461698, 0.9984542, 0.776394...
97 [-0.5863306, 0.38590172, -0.15509816, 0.275745...
98 [0.22616625, 0.33843663, 0.030288033, 0.191214...
99 [0.40268317, 1.1528935, 0.3597172, 0.16918863,...
Name: text, Length: 100, dtype: object
type(df_out_1)
pandas.core.series.Series
# 2. Using Sentence Encoder on entire array of sentences at once
%%timeit
df_out_2 = sbert_model.encode(df_100['text'].values)
9.25 s ± 275 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%time
df_out_2 = sbert_model.encode(df_100['text'].values)
CPU times: user 17.8 s, sys: 1 s, total: 18.8 s
Wall time: 9.41 s
df_out_2
array([[-0.7970604 , 0.4761617 , 0.262157 , ..., -0.2437577 ,
1.1380528 , 0.28697717],
[-0.32450542, -0.10945235, 0.64436567, ..., 0.14436643,
-0.24656864, -0.18447737],
[-0.26029944, -0.00363465, 1.2917686 , ..., 1.1463983 ,
-1.0714562 , -0.09548129],
...,
[-0.5863306 , 0.38590172, -0.15509816, ..., -0.2937488 ,
-0.3724223 , 0.1826524 ],
[ 0.22616649, 0.33843663, 0.03028765, ..., 0.66338176,
-0.6620043 , 0.09410357],
[ 0.40268335, 1.152894 , 0.35971704, ..., -0.9203086 ,
0.17893644, 0.71039814]], dtype=float32)
type(df_out_2)
numpy.ndarray
df_out_2.shape
(100, 768)
Pages
- Index of Lessons in Technology
- Index of Book Summaries
- Index of Book Lists And Downloads
- Index For Job Interviews Preparation
- Index of "Algorithms: Design and Analysis"
- Python Course (Index)
- Data Analytics Course (Index)
- Index of Machine Learning
- Postings Index
- Index of BITS WILP Exam Papers and Content
- Lessons in Investing
- Index of Math Lessons
- Index of Management Lessons
- Book Requests
- Index of English Lessons
- Index of Medicines
- Index of Quizzes (Educational)
Monday, September 19, 2022
Performance testing of BERT based Sentence Transformers for sentence encoding
Download Code
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment