Adds pickling of tfidf vectorizer
This commit is contained in:
parent
353b9f85cf
commit
4f4af74259
2 changed files with 8 additions and 2 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
.jupyter/
|
||||
env/
|
||||
data/tfidf_matrix.pckl
|
||||
data/tfidf_vectorizer.pckl
|
||||
|
|
|
@ -4,7 +4,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|||
import pickle
|
||||
|
||||
tweet_path = "data/tweets_all_combined.csv"
|
||||
tfidf_pickle_path = "data/tfidf_matrix.pckl"
|
||||
tfidf_matrix_path = "data/tfidf_matrix.pckl"
|
||||
tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
|
||||
relevancy_score_path = "data/tweet_relevance.json"
|
||||
|
||||
print("Creating TFIDF Matrix")
|
||||
|
@ -14,9 +15,13 @@ vectorizer = TfidfVectorizer()
|
|||
model = vectorizer.fit_transform([x.lower() for x in tweets["tweet_text"]])
|
||||
|
||||
print("Saving TFIDF Matrix")
|
||||
with open(tfidf_pickle_path, "wb") as f:
|
||||
with open(tfidf_matrix_path, "wb") as f:
|
||||
pickle.dump(model, f, protocol=5)
|
||||
|
||||
print("Saving TFIDF vectorizer")
|
||||
with open(tfidf_vectorizer_path, "wb") as f:
|
||||
pickle.dump(vectorizer, f, protocol=5)
|
||||
|
||||
print("Calculating relevance score and saving new csv")
|
||||
like_count_weight = 1.0
|
||||
retweet_count_weight = 1.0
|
||||
|
|
Loading…
Reference in a new issue