Adds pickling of tfidf vectorizer
This commit is contained in:
parent
353b9f85cf
commit
4f4af74259
2 changed files with 8 additions and 2 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
.jupyter/
|
.jupyter/
|
||||||
env/
|
env/
|
||||||
data/tfidf_matrix.pckl
|
data/tfidf_matrix.pckl
|
||||||
|
data/tfidf_vectorizer.pckl
|
||||||
|
|
|
@ -4,7 +4,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
tweet_path = "data/tweets_all_combined.csv"
|
tweet_path = "data/tweets_all_combined.csv"
|
||||||
tfidf_pickle_path = "data/tfidf_matrix.pckl"
|
tfidf_matrix_path = "data/tfidf_matrix.pckl"
|
||||||
|
tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
|
||||||
relevancy_score_path = "data/tweet_relevance.json"
|
relevancy_score_path = "data/tweet_relevance.json"
|
||||||
|
|
||||||
print("Creating TFIDF Matrix")
|
print("Creating TFIDF Matrix")
|
||||||
|
@ -14,9 +15,13 @@ vectorizer = TfidfVectorizer()
|
||||||
model = vectorizer.fit_transform([x.lower() for x in tweets["tweet_text"]])
|
model = vectorizer.fit_transform([x.lower() for x in tweets["tweet_text"]])
|
||||||
|
|
||||||
print("Saving TFIDF Matrix")
|
print("Saving TFIDF Matrix")
|
||||||
with open(tfidf_pickle_path, "wb") as f:
|
with open(tfidf_matrix_path, "wb") as f:
|
||||||
pickle.dump(model, f, protocol=5)
|
pickle.dump(model, f, protocol=5)
|
||||||
|
|
||||||
|
print("Saving TFIDF vectorizer")
|
||||||
|
with open(tfidf_vectorizer_path, "wb") as f:
|
||||||
|
pickle.dump(vectorizer, f, protocol=5)
|
||||||
|
|
||||||
print("Calculating relevance score and saving new csv")
|
print("Calculating relevance score and saving new csv")
|
||||||
like_count_weight = 1.0
|
like_count_weight = 1.0
|
||||||
retweet_count_weight = 1.0
|
retweet_count_weight = 1.0
|
||||||
|
|
Loading…
Reference in a new issue