Improves saving of tweets_all_combined
This commit is contained in:
parent
b2b903f45a
commit
d38a3002c6
2 changed files with 151694 additions and 151693 deletions
303382
data/tweets_all_combined.csv
303382
data/tweets_all_combined.csv
File diff suppressed because it is too large
Load diff
|
@ -45,10 +45,11 @@ tweets_combined = pd.merge(tweets_statistics,
|
||||||
|
|
||||||
# Convert datatypes to appropriate one
|
# Convert datatypes to appropriate one
|
||||||
tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[[
|
tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[[
|
||||||
'like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int)
|
'like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-1).astype(int)
|
||||||
tweets_combined = tweets_combined.assign(measured_at=pd.to_datetime(tweets_combined['measured_at']), # change date to date format
|
tweets_combined = tweets_combined.assign(measured_at=pd.to_datetime(tweets_combined['measured_at']), # change date to date format
|
||||||
created_at=pd.to_datetime(tweets_combined['created_at']),
|
created_at=pd.to_datetime(tweets_combined['created_at']),
|
||||||
# handle to lower case
|
# handle to lower case
|
||||||
handle=tweets_combined['handle'].str.lower(),
|
handle=tweets_combined['handle'].str.lower(),
|
||||||
is_deleted=tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable
|
is_deleted=tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable
|
||||||
tweets_combined.to_csv("data/tweets_all_combined.csv")
|
|
||||||
|
tweets_combined.to_csv("data/tweets_all_combined.csv", header=True, index=False)
|
||||||
|
|
Loading…
Reference in a new issue