Improves saving of tweets_all_combined

This commit is contained in:
procrastimax 2023-06-28 00:11:58 +02:00
parent b2b903f45a
commit d38a3002c6
2 changed files with 151694 additions and 151693 deletions

File diff suppressed because it is too large Load Diff

View File

@ -45,10 +45,11 @@ tweets_combined = pd.merge(tweets_statistics,
# Convert datatypes to appropriate one
tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[[
'like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int)
'like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-1).astype(int)
tweets_combined = tweets_combined.assign(measured_at=pd.to_datetime(tweets_combined['measured_at']), # change date to date format
created_at=pd.to_datetime(tweets_combined['created_at']),
# handle to lower case
handle=tweets_combined['handle'].str.lower(),
is_deleted=tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable
tweets_combined.to_csv("data/tweets_all_combined.csv")
tweets_combined.to_csv("data/tweets_all_combined.csv", header=True, index=False)