finished tidying data
This commit is contained in:
		
							parent
							
								
									880c989c03
								
							
						
					
					
						commit
						36de4fdf81
					
				
					 7 changed files with 334138 additions and 529 deletions
				
			
		
							
								
								
									
										50
									
								
								.ipynb_checkpoints/merge_police_tweets-checkpoint.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								.ipynb_checkpoints/merge_police_tweets-checkpoint.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,50 @@ | |||
| import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| # Merging different table of old (~2021) and new (~2022) scraper | ||||
| 
 | ||||
| ## cols: hashtag, url, mention (same for both) | ||||
| tweets_meta = pd.concat([pd.read_csv("data/entity_old.tsv", sep = "\t"), # data from old scraper | ||||
|                          pd.read_csv("data/tweets.csv")]) # data from new scraper | ||||
| 
 | ||||
| ## cols: id, tweet_text, created_at, user_id; only subset from old table (same tsv used in next step) | ||||
| tweets_text = pd.concat([pd.read_csv("data/tweet_old.tsv", sep = "\t")[['id','tweet_text', 'created_at', 'user_id']].rename(columns = {"id":"tweet_id"}), | ||||
|                          pd.read_csv("data/tweets-1679742698645.csv")]) | ||||
| 
 | ||||
| ## cols: id, like_count, retweet_count, reply_count, quote_count; only subset from old table | ||||
| tweets_statistics = pd.concat([pd.read_csv("data/tweet_old.tsv", sep = "\t")[['id', 'like_count', 'retweet_count', 'reply_count', 'quote_count']].rename(columns = {"id":"tweet_id"}), | ||||
|                               pd.read_csv("data/tweets-1679742620302.csv")]) | ||||
| 
 | ||||
| ## cols: user_id, handle, user_name; colnames do not match betweend old an new data. Even username and handle seem to be mixed up in new data set (inverse order) | ||||
| ## Info: Only a small amount of user_ids appear in both data sets, but if so username occasionaly have changed an therefore can not easily be merged | ||||
| tweets_user = pd.read_csv("data/user_old.tsv",  | ||||
|                           sep = "\t").rename(columns = {"id":"user_id","name": "user_name"} # uniform names | ||||
|                                             ).merge(pd.read_csv("data/tweets-1679742702794.csv" # merge with renamed new data | ||||
|                                                                ).rename(columns = {"username":"handle", "handle": "user_name"}), # reverse col names | ||||
|                                                    on = "user_id", # user_id as matching column | ||||
|                                                    how = "outer", # keep all unique uer_ids | ||||
|                                                    suffixes = ["_2021", "_2022"]) # identify column where username and label came from | ||||
| 
 | ||||
| ## Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept. | ||||
| tweets_user = tweets_user.assign(handle    = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1), | ||||
|                                  user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1) | ||||
|                                 ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1) # no longer needed | ||||
| 
 | ||||
| ## addiditional information concerning the police stations | ||||
| ## cols: handle, name, typ, bundesland, stadt, lat, long | ||||
| police_stations = pd.read_csv("data/polizei_accounts_geo.csv", sep = "\t"   | ||||
|                              ).rename(columns = {"Polizei Account": "handle"}) | ||||
| 
 | ||||
| # Merge statistics, tweet text and user information in one data frame | ||||
| tweets_combined = pd.merge(tweets_statistics,  | ||||
|                            tweets_text, | ||||
|                            on = 'tweet_id').merge(tweets_user, on = 'user_id' | ||||
|                                                  ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index) | ||||
| 
 | ||||
| # Convert datatypes to appropriate one | ||||
| tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int) | ||||
| tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format | ||||
|                                          created_at  = pd.to_datetime(tweets_combined['created_at']), | ||||
|                                          handle      = tweets_combined['handle'].str.lower(), # handle to lower case | ||||
|                                          is_deleted  = tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable | ||||
| tweets_combined.#to_csv("data/tweets_all_combined.csv") | ||||
|  | @ -12,7 +12,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": 1, | ||||
|    "id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -37,7 +37,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 117, | ||||
|    "execution_count": 45, | ||||
|    "id": "fcc48831-7999-4d79-b722-736715b1ced6", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -46,48 +46,50 @@ | |||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "((479991, 3), (151690, 8), (151690, 4), (13327, 3))" | ||||
|        "((479991, 3), (151690, 8), (151690, 4), (13327, 3), (163, 7))" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 117, | ||||
|      "execution_count": 45, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Merging different table of old (~2021) and new (~2022) scraper\n", | ||||
|     "\n", | ||||
|     "## cols: hashtag, url, mention (same for both)\n", | ||||
|     "tweets_meta = pd.concat([pd.read_csv(\"data/entity_old.tsv\", sep = \"\\t\"), # data from old scraper\n", | ||||
|     "                         pd.read_csv(\"data/tweets.csv\")]) # data from new scraper\n", | ||||
|     "\n", | ||||
|     "tweets_text = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n", | ||||
|     "                                                                        'tweet_text', \n", | ||||
|     "                                                                        'created_at', \n", | ||||
|     "                                                                        'user_id']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "## cols: id, tweet_text, created_at, user_id; only subset from old table (same tsv used in next step)\n", | ||||
|     "tweets_text = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id','tweet_text', 'created_at', 'user_id']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "                         pd.read_csv(\"data/tweets-1679742698645.csv\")])\n", | ||||
|     "\n", | ||||
|     "tweets_statistics = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n", | ||||
|     "                                                                              'like_count', \n", | ||||
|     "                                                                              'retweet_count', \n", | ||||
|     "                                                                              'reply_count', \n", | ||||
|     "                                                                              'quote_count']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "## cols: id, like_count, retweet_count, reply_count, quote_count; only subset from old table\n", | ||||
|     "tweets_statistics = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', 'like_count', 'retweet_count', 'reply_count', 'quote_count']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "                              pd.read_csv(\"data/tweets-1679742620302.csv\")])\n", | ||||
|     "\n", | ||||
|     "## cols: user_id, handle, user_name; colnames do not match betweend old an new data. Even username and handle seem to be mixed up in new data set (inverse order)\n", | ||||
|     "## Info: Only a small amount of user_ids appear in both data sets, but if so username occasionaly have changed an therefore can not easily be merged\n", | ||||
|     "tweets_user = pd.read_csv(\"data/user_old.tsv\", \n", | ||||
|     "                          sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"}\n", | ||||
|     "                                            ).merge(pd.read_csv(\"data/tweets-1679742702794.csv\"\n", | ||||
|     "                                                               ).rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"}),\n", | ||||
|     "                                                   on = \"user_id\",\n", | ||||
|     "                                                   how = \"outer\",\n", | ||||
|     "                                                   suffixes = [\"_2021\", \"_2022\"])\n", | ||||
|     "                          sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"} # uniform names\n", | ||||
|     "                                            ).merge(pd.read_csv(\"data/tweets-1679742702794.csv\" # merge with renamed new data\n", | ||||
|     "                                                               ).rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"}), # reverse col names\n", | ||||
|     "                                                   on = \"user_id\", # user_id as matching column\n", | ||||
|     "                                                   how = \"outer\", # keep all unique uer_ids\n", | ||||
|     "                                                   suffixes = [\"_2021\", \"_2022\"]) # identify column where username and label came from\n", | ||||
|     "\n", | ||||
|     "# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n", | ||||
|     "## Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept.\n", | ||||
|     "tweets_user = tweets_user.assign(handle    = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n", | ||||
|     "                                 user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n", | ||||
|     "                                ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n", | ||||
|     "                                ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1) # no longer needed\n", | ||||
|     "\n", | ||||
|     "police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\"  # addiditional on police stations\n", | ||||
|     "## addiditional information concerning the police stations\n", | ||||
|     "## cols: handle, name, typ, bundesland, stadt, lat, long\n", | ||||
|     "police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\"  \n", | ||||
|     "                             ).rename(columns = {\"Polizei Account\": \"handle\"})\n", | ||||
|     "\n", | ||||
|     "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape" | ||||
|     "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape, police_stations.shape" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -100,14 +102,14 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 118, | ||||
|    "execution_count": 24, | ||||
|    "id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Merge like statistics, tweet text and user information in one data frame\n", | ||||
|     "# Merge statistics, tweet text and user information in one data frame\n", | ||||
|     "tweets_combined = pd.merge(tweets_statistics, \n", | ||||
|     "                           tweets_text,\n", | ||||
|     "                           on = 'tweet_id').merge(tweets_user, on = 'user_id'\n", | ||||
|  | @ -117,22 +119,12 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 119, | ||||
|    "execution_count": 49, | ||||
|    "id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n", | ||||
|       "  output = repr(obj)\n", | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n", | ||||
|       "  return method()\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|  | @ -177,8 +169,8 @@ | |||
|        "      <td>2</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@mahanna196 Da die Stadt keine Ausnahme für Radfahrer aufgeführt hat, gilt diese (Stand jetzt) a...</td>\n", | ||||
|        "      <td>2020-10-27 09:29:13</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -192,7 +184,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@mahanna196 Ja. *sr</td>\n", | ||||
|        "      <td>2020-10-27 10:35:38</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|  | @ -207,8 +199,8 @@ | |||
|        "      <td>3</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausgelände in #Rostrup wurde ein Sprengsatz gefunden. F...</td>\n", | ||||
|        "      <td>2020-10-27 12:36:26</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -222,8 +214,8 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/2 *sr</td>\n", | ||||
|        "      <td>2020-10-27 12:59:06</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -237,8 +229,8 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>In der vergangenen Woche wurde die Wohnung des...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>In der vergangenen Woche wurde die Wohnung des Tatverdächtigen durchsucht. Dabei stellten die Be...</td>\n", | ||||
|        "      <td>2020-10-27 13:57:32</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -268,7 +260,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-19 13:40:36</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n", | ||||
|        "      <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 hat zu diesem Thema wieder einmal die Puppen tanze...</td>\n", | ||||
|        "      <td>2023-02-15 12:06:07</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -283,7 +275,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-25 13:14:49</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n", | ||||
|        "      <td>Unser Präventionsteam vom #A44 berät heute und morgen tagsüber zum Thema Alkohol &amp; Drogen + ...</td>\n", | ||||
|        "      <td>2023-02-21 12:10:00</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -298,7 +290,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-25 13:14:49</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n", | ||||
|        "      <td>Auch unser #A52 war heute aktiv und hat zum Thema Alkohol &amp; Drogen im Straßenverkehr beraten...</td>\n", | ||||
|        "      <td>2023-02-21 12:12:48</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -313,7 +305,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-26 13:15:05</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n", | ||||
|        "      <td>Gestern führte unser #A13 in einer Wohnsiedlung einen Präventionseinsatz zum Thema „Wohnraumeinb...</td>\n", | ||||
|        "      <td>2023-02-22 11:15:58</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -328,7 +320,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-27 12:17:33</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n", | ||||
|        "      <td>Auf dem Gelände der @BUFAStudios (Oberlandstr. 26-35) findet heute die #Seniorenmesse vom Bezirk...</td>\n", | ||||
|        "      <td>2023-02-23 10:53:07</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -353,44 +345,44 @@ | |||
|        "151688  1628352896352878593           2              0            0   \n", | ||||
|        "151689  1628709531998998529          10              1            0   \n", | ||||
|        "\n", | ||||
|        "        quote_count         measured_at is_deleted  \\\n", | ||||
|        "0                 0                 NaT        NaN   \n", | ||||
|        "1                 0                 NaT        NaN   \n", | ||||
|        "2                 0                 NaT        NaN   \n", | ||||
|        "3                 0                 NaT        NaN   \n", | ||||
|        "4                 0                 NaT        NaN   \n", | ||||
|        "...             ...                 ...        ...   \n", | ||||
|        "151685            0 2023-02-19 13:40:36      False   \n", | ||||
|        "151686            0 2023-02-25 13:14:49      False   \n", | ||||
|        "151687            0 2023-02-25 13:14:49      False   \n", | ||||
|        "151688            0 2023-02-26 13:15:05      False   \n", | ||||
|        "151689            0 2023-02-27 12:17:33      False   \n", | ||||
|        "        quote_count         measured_at  is_deleted  \\\n", | ||||
|        "0                 0                 NaT        <NA>   \n", | ||||
|        "1                 0                 NaT        <NA>   \n", | ||||
|        "2                 0                 NaT        <NA>   \n", | ||||
|        "3                 0                 NaT        <NA>   \n", | ||||
|        "4                 0                 NaT        <NA>   \n", | ||||
|        "...             ...                 ...         ...   \n", | ||||
|        "151685            0 2023-02-19 13:40:36       False   \n", | ||||
|        "151686            0 2023-02-25 13:14:49       False   \n", | ||||
|        "151687            0 2023-02-25 13:14:49       False   \n", | ||||
|        "151688            0 2023-02-26 13:15:05       False   \n", | ||||
|        "151689            0 2023-02-27 12:17:33       False   \n", | ||||
|        "\n", | ||||
|        "                                               tweet_text          created_at  \\\n", | ||||
|        "0       @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13   \n", | ||||
|        "1                                     @mahanna196 Ja. *sr 2020-10-27 10:35:38   \n", | ||||
|        "2       #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26   \n", | ||||
|        "3       @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06   \n", | ||||
|        "4       In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32   \n", | ||||
|        "...                                                   ...                 ...   \n", | ||||
|        "151685  #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07   \n", | ||||
|        "151686  Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00   \n", | ||||
|        "151687  Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48   \n", | ||||
|        "151688  Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58   \n", | ||||
|        "151689  Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07   \n", | ||||
|        "                                                                                                 tweet_text  \\\n", | ||||
|        "0       @mahanna196 Da die Stadt keine Ausnahme für Radfahrer aufgeführt hat, gilt diese (Stand jetzt) a...   \n", | ||||
|        "1                                                                                       @mahanna196 Ja. *sr   \n", | ||||
|        "2       #Aktuell Auf dem ehem. Bundeswehrkrankenhausgelände in #Rostrup wurde ein Sprengsatz gefunden. F...   \n", | ||||
|        "3                                                       @Emma36166433 Bitte lesen Sie unseren Tweet 2/2 *sr   \n", | ||||
|        "4       In der vergangenen Woche wurde die Wohnung des Tatverdächtigen durchsucht. Dabei stellten die Be...   \n", | ||||
|        "...                                                                                                     ...   \n", | ||||
|        "151685  #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 hat zu diesem Thema wieder einmal die Puppen tanze...   \n", | ||||
|        "151686  Unser Präventionsteam vom #A44 berät heute und morgen tagsüber zum Thema Alkohol & Drogen + ...   \n", | ||||
|        "151687  Auch unser #A52 war heute aktiv und hat zum Thema Alkohol & Drogen im Straßenverkehr beraten...   \n", | ||||
|        "151688  Gestern führte unser #A13 in einer Wohnsiedlung einen Präventionseinsatz zum Thema „Wohnraumeinb...   \n", | ||||
|        "151689  Auf dem Gelände der @BUFAStudios (Oberlandstr. 26-35) findet heute die #Seniorenmesse vom Bezirk...   \n", | ||||
|        "\n", | ||||
|        "                    user_id           handle  \\\n", | ||||
|        "0        778895426007203840       polizei_ol   \n", | ||||
|        "1        778895426007203840       polizei_ol   \n", | ||||
|        "2        778895426007203840       polizei_ol   \n", | ||||
|        "3        778895426007203840       polizei_ol   \n", | ||||
|        "4        778895426007203840       polizei_ol   \n", | ||||
|        "...                     ...              ...   \n", | ||||
|        "151685  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151686  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151687  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151688  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151689  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "                created_at              user_id           handle  \\\n", | ||||
|        "0      2020-10-27 09:29:13   778895426007203840       polizei_ol   \n", | ||||
|        "1      2020-10-27 10:35:38   778895426007203840       polizei_ol   \n", | ||||
|        "2      2020-10-27 12:36:26   778895426007203840       polizei_ol   \n", | ||||
|        "3      2020-10-27 12:59:06   778895426007203840       polizei_ol   \n", | ||||
|        "4      2020-10-27 13:57:32   778895426007203840       polizei_ol   \n", | ||||
|        "...                    ...                  ...              ...   \n", | ||||
|        "151685 2023-02-15 12:06:07  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151686 2023-02-21 12:10:00  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151687 2023-02-21 12:12:48  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151688 2023-02-22 11:15:58  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151689 2023-02-23 10:53:07  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "\n", | ||||
|        "                                user_name  \n", | ||||
|        "0       Polizei Oldenburg-Stadt/Ammerland  \n", | ||||
|  | @ -408,19 +400,19 @@ | |||
|        "[151690 rows x 12 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 119, | ||||
|      "execution_count": 49, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Convert Counts to integer values\n", | ||||
|     "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n", | ||||
|     "# Convert datatypes to appropriate one\n", | ||||
|     "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int)\n", | ||||
|     "tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n", | ||||
|     "                                         created_at  = pd.to_datetime(tweets_combined['created_at']),\n", | ||||
|     "                                         handle      = tweets_combined['handle'].str.lower(),\n", | ||||
|     "                                         is_deleted  = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n", | ||||
|     "tweets_combined" | ||||
|     "                                         handle      = tweets_combined['handle'].str.lower(), # handle to lower case\n", | ||||
|     "                                         is_deleted  = tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable\n", | ||||
|     "tweets_combined#.to_csv(\"data/tweets_all_combined.csv\")" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -472,7 +464,7 @@ | |||
|    "source": [ | ||||
|     "## Metadaten \n", | ||||
|     "\n", | ||||
|     "Welche Daten bilden die Grundlage?" | ||||
|     "Welche Daten bilden die Grundlage?\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -537,7 +529,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 114, | ||||
|    "execution_count": 10, | ||||
|    "id": "4f1e8c6c-3610-436e-899e-4d0307259230", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -547,12 +539,12 @@ | |||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Die Tweets wurden vom  2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n" | ||||
|       "Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n", | ||||
|     "print(\"Die Tweets wurden vom\", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n", | ||||
|     "# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag" | ||||
|    ] | ||||
|   }, | ||||
|  | @ -568,7 +560,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 122, | ||||
|    "execution_count": 11, | ||||
|    "id": "9373552e-6baf-46df-ae16-c63603e20a83", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -681,7 +673,7 @@ | |||
|        "61   Hamburg   53.550341   10.000654  " | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 122, | ||||
|      "execution_count": 11, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -710,31 +702,23 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 123, | ||||
|    "execution_count": 13, | ||||
|    "id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", | ||||
|       "  for col_name, dtype in df.dtypes.iteritems():\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "\n", | ||||
|        "<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n", | ||||
|        "<div id=\"altair-viz-7b78525a62b243eca7b1f4044a328f47\"></div>\n", | ||||
|        "<script type=\"text/javascript\">\n", | ||||
|        "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n", | ||||
|        "  (function(spec, embedOpt){\n", | ||||
|        "    let outputDiv = document.currentScript.previousElementSibling;\n", | ||||
|        "    if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n", | ||||
|        "      outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n", | ||||
|        "    if (outputDiv.id !== \"altair-viz-7b78525a62b243eca7b1f4044a328f47\") {\n", | ||||
|        "      outputDiv = document.getElementById(\"altair-viz-7b78525a62b243eca7b1f4044a328f47\");\n", | ||||
|        "    }\n", | ||||
|        "    const paths = {\n", | ||||
|        "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", | ||||
|  | @ -780,14 +764,14 @@ | |||
|        "        .catch(showError)\n", | ||||
|        "        .then(() => displayChart(vegaEmbed));\n", | ||||
|        "    }\n", | ||||
|        "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n", | ||||
|        "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"ordinal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n", | ||||
|        "</script>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "alt.Chart(...)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 123, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -795,7 +779,7 @@ | |||
|    "source": [ | ||||
|     "barchart = alt.Chart(activy_police_vis[0:15]).mark_bar().encode(\n", | ||||
|     "    x = 'count:Q',\n", | ||||
|     "    y = alt.Y('handle:N', sort = '-x'),\n", | ||||
|     "    y = alt.Y('handle:O', sort = '-x'),\n", | ||||
|     ")\n", | ||||
|     "barchart " | ||||
|    ] | ||||
|  | @ -812,7 +796,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 125, | ||||
|    "execution_count": 14, | ||||
|    "id": "d0549250-b11f-4762-8500-1134c53303b4", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -820,22 +804,377 @@ | |||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<style scoped>\n", | ||||
|        "    .dataframe tbody tr th:only-of-type {\n", | ||||
|        "        vertical-align: middle;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe tbody tr th {\n", | ||||
|        "        vertical-align: top;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe thead th {\n", | ||||
|        "        text-align: right;\n", | ||||
|        "    }\n", | ||||
|        "</style>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>index</th>\n", | ||||
|        "      <th>tweet_id</th>\n", | ||||
|        "      <th>like_count</th>\n", | ||||
|        "      <th>retweet_count</th>\n", | ||||
|        "      <th>reply_count</th>\n", | ||||
|        "      <th>quote_count</th>\n", | ||||
|        "      <th>measured_at</th>\n", | ||||
|        "      <th>is_deleted</th>\n", | ||||
|        "      <th>tweet_text</th>\n", | ||||
|        "      <th>created_at</th>\n", | ||||
|        "      <th>user_id</th>\n", | ||||
|        "      <th>handle</th>\n", | ||||
|        "      <th>user_name</th>\n", | ||||
|        "      <th>Name</th>\n", | ||||
|        "      <th>Typ</th>\n", | ||||
|        "      <th>Bundesland</th>\n", | ||||
|        "      <th>Stadt</th>\n", | ||||
|        "      <th>LAT</th>\n", | ||||
|        "      <th>LONG</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>0</th>\n", | ||||
|        "      <td>3053</td>\n", | ||||
|        "      <td>1609539240458878979</td>\n", | ||||
|        "      <td>21455</td>\n", | ||||
|        "      <td>1845</td>\n", | ||||
|        "      <td>3643</td>\n", | ||||
|        "      <td>341</td>\n", | ||||
|        "      <td>2023-01-05 14:44:34</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...</td>\n", | ||||
|        "      <td>2023-01-01 13:17:13</td>\n", | ||||
|        "      <td>2397974054</td>\n", | ||||
|        "      <td>polizeiberlin</td>\n", | ||||
|        "      <td>Polizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1</th>\n", | ||||
|        "      <td>1331</td>\n", | ||||
|        "      <td>1355179228396879872</td>\n", | ||||
|        "      <td>19186</td>\n", | ||||
|        "      <td>3386</td>\n", | ||||
|        "      <td>1203</td>\n", | ||||
|        "      <td>628</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...</td>\n", | ||||
|        "      <td>2021-01-29 15:41:20</td>\n", | ||||
|        "      <td>2397974054</td>\n", | ||||
|        "      <td>polizeiberlin</td>\n", | ||||
|        "      <td>Polizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>2</th>\n", | ||||
|        "      <td>91693</td>\n", | ||||
|        "      <td>1505620459148173316</td>\n", | ||||
|        "      <td>15708</td>\n", | ||||
|        "      <td>7098</td>\n", | ||||
|        "      <td>186</td>\n", | ||||
|        "      <td>540</td>\n", | ||||
|        "      <td>2022-03-24 20:15:08</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...</td>\n", | ||||
|        "      <td>2022-03-20 19:01:05</td>\n", | ||||
|        "      <td>2389161066</td>\n", | ||||
|        "      <td>polizei_nrw_bn</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Bonn</td>\n", | ||||
|        "      <td>50.735851</td>\n", | ||||
|        "      <td>7.10066</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>3</th>\n", | ||||
|        "      <td>91695</td>\n", | ||||
|        "      <td>1505620666476896259</td>\n", | ||||
|        "      <td>10337</td>\n", | ||||
|        "      <td>1539</td>\n", | ||||
|        "      <td>59</td>\n", | ||||
|        "      <td>35</td>\n", | ||||
|        "      <td>2022-03-24 20:15:08</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...</td>\n", | ||||
|        "      <td>2022-03-20 19:01:54</td>\n", | ||||
|        "      <td>2389161066</td>\n", | ||||
|        "      <td>polizei_nrw_bn</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Bonn</td>\n", | ||||
|        "      <td>50.735851</td>\n", | ||||
|        "      <td>7.10066</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>4</th>\n", | ||||
|        "      <td>122631</td>\n", | ||||
|        "      <td>1359098196434292739</td>\n", | ||||
|        "      <td>9471</td>\n", | ||||
|        "      <td>642</td>\n", | ||||
|        "      <td>128</td>\n", | ||||
|        "      <td>102</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2</td>\n", | ||||
|        "      <td>2021-02-09 11:13:55</td>\n", | ||||
|        "      <td>4876039738</td>\n", | ||||
|        "      <td>bpol_b</td>\n", | ||||
|        "      <td>Bundespolizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151685</th>\n", | ||||
|        "      <td>7569</td>\n", | ||||
|        "      <td>1332625325654757377</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...</td>\n", | ||||
|        "      <td>2020-11-28 10:00:11</td>\n", | ||||
|        "      <td>223758384</td>\n", | ||||
|        "      <td>polizeisachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Sachsen</td>\n", | ||||
|        "      <td>Dresden</td>\n", | ||||
|        "      <td>51.0493286</td>\n", | ||||
|        "      <td>13.7381437</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151686</th>\n", | ||||
|        "      <td>7572</td>\n", | ||||
|        "      <td>1332738525507186692</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...</td>\n", | ||||
|        "      <td>2020-11-28 17:30:00</td>\n", | ||||
|        "      <td>223758384</td>\n", | ||||
|        "      <td>polizeisachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Sachsen</td>\n", | ||||
|        "      <td>Dresden</td>\n", | ||||
|        "      <td>51.0493286</td>\n", | ||||
|        "      <td>13.7381437</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151687</th>\n", | ||||
|        "      <td>144702</td>\n", | ||||
|        "      <td>1465679768494526467</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...</td>\n", | ||||
|        "      <td>2021-11-30 13:51:02</td>\n", | ||||
|        "      <td>4876085224</td>\n", | ||||
|        "      <td>bpol_nord</td>\n", | ||||
|        "      <td>Bundespolizei Nord</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151688</th>\n", | ||||
|        "      <td>144701</td>\n", | ||||
|        "      <td>1464124290605977600</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...</td>\n", | ||||
|        "      <td>2021-11-26 06:50:07</td>\n", | ||||
|        "      <td>4876085224</td>\n", | ||||
|        "      <td>bpol_nord</td>\n", | ||||
|        "      <td>Bundespolizei Nord</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151689</th>\n", | ||||
|        "      <td>66854</td>\n", | ||||
|        "      <td>1376453040283209728</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>#Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...</td>\n", | ||||
|        "      <td>2021-03-29 08:35:52</td>\n", | ||||
|        "      <td>2389263558</td>\n", | ||||
|        "      <td>polizei_nrw_un</td>\n", | ||||
|        "      <td>Polizei NRW UN</td>\n", | ||||
|        "      <td>Polizei NRW UN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Unna</td>\n", | ||||
|        "      <td>51.5348835</td>\n", | ||||
|        "      <td>7.689014</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "<p>151690 rows × 19 columns</p>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "0         Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n", | ||||
|        "1         An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n", | ||||
|        "2         WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n", | ||||
|        "3         Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n", | ||||
|        "4                                             Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n", | ||||
|        "                                                         ...                                                 \n", | ||||
|        "151685    Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n", | ||||
|        "151686    📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n", | ||||
|        "151687    Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n", | ||||
|        "151688    @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n", | ||||
|        "151689    #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n", | ||||
|        "Name: tweet_text, Length: 151690, dtype: object" | ||||
|        "         index             tweet_id  like_count  retweet_count  reply_count  \\\n", | ||||
|        "0         3053  1609539240458878979       21455           1845         3643   \n", | ||||
|        "1         1331  1355179228396879872       19186           3386         1203   \n", | ||||
|        "2        91693  1505620459148173316       15708           7098          186   \n", | ||||
|        "3        91695  1505620666476896259       10337           1539           59   \n", | ||||
|        "4       122631  1359098196434292739        9471            642          128   \n", | ||||
|        "...        ...                  ...         ...            ...          ...   \n", | ||||
|        "151685    7569  1332625325654757377         -99            -99          -99   \n", | ||||
|        "151686    7572  1332738525507186692         -99            -99          -99   \n", | ||||
|        "151687  144702  1465679768494526467         -99            -99          -99   \n", | ||||
|        "151688  144701  1464124290605977600         -99            -99          -99   \n", | ||||
|        "151689   66854  1376453040283209728         -99            -99          -99   \n", | ||||
|        "\n", | ||||
|        "        quote_count         measured_at is_deleted  \\\n", | ||||
|        "0               341 2023-01-05 14:44:34      False   \n", | ||||
|        "1               628                 NaT        NaN   \n", | ||||
|        "2               540 2022-03-24 20:15:08      False   \n", | ||||
|        "3                35 2022-03-24 20:15:08      False   \n", | ||||
|        "4               102                 NaT        NaN   \n", | ||||
|        "...             ...                 ...        ...   \n", | ||||
|        "151685          -99                 NaT        NaN   \n", | ||||
|        "151686          -99                 NaT        NaN   \n", | ||||
|        "151687          -99                 NaT        NaN   \n", | ||||
|        "151688          -99                 NaT        NaN   \n", | ||||
|        "151689          -99                 NaT        NaN   \n", | ||||
|        "\n", | ||||
|        "                                                                                                 tweet_text  \\\n", | ||||
|        "0       Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...   \n", | ||||
|        "1       An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...   \n", | ||||
|        "2       WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...   \n", | ||||
|        "3       Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...   \n", | ||||
|        "4                                           Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2   \n", | ||||
|        "...                                                                                                     ...   \n", | ||||
|        "151685  Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...   \n", | ||||
|        "151686  📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...   \n", | ||||
|        "151687  Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...   \n", | ||||
|        "151688  @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...   \n", | ||||
|        "151689  #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...   \n", | ||||
|        "\n", | ||||
|        "                created_at     user_id          handle             user_name  \\\n", | ||||
|        "0      2023-01-01 13:17:13  2397974054   polizeiberlin        Polizei Berlin   \n", | ||||
|        "1      2021-01-29 15:41:20  2397974054   polizeiberlin        Polizei Berlin   \n", | ||||
|        "2      2022-03-20 19:01:05  2389161066  polizei_nrw_bn        Polizei NRW BN   \n", | ||||
|        "3      2022-03-20 19:01:54  2389161066  polizei_nrw_bn        Polizei NRW BN   \n", | ||||
|        "4      2021-02-09 11:13:55  4876039738          bpol_b  Bundespolizei Berlin   \n", | ||||
|        "...                    ...         ...             ...                   ...   \n", | ||||
|        "151685 2020-11-28 10:00:11   223758384  polizeisachsen       Polizei Sachsen   \n", | ||||
|        "151686 2020-11-28 17:30:00   223758384  polizeisachsen       Polizei Sachsen   \n", | ||||
|        "151687 2021-11-30 13:51:02  4876085224       bpol_nord    Bundespolizei Nord   \n", | ||||
|        "151688 2021-11-26 06:50:07  4876085224       bpol_nord    Bundespolizei Nord   \n", | ||||
|        "151689 2021-03-29 08:35:52  2389263558  polizei_nrw_un        Polizei NRW UN   \n", | ||||
|        "\n", | ||||
|        "                   Name      Typ           Bundesland    Stadt         LAT  \\\n", | ||||
|        "0                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "1                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "2        Polizei NRW BN  Polizei  Nordrhein-Westfalen     Bonn   50.735851   \n", | ||||
|        "3        Polizei NRW BN  Polizei  Nordrhein-Westfalen     Bonn   50.735851   \n", | ||||
|        "4                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "...                 ...      ...                  ...      ...         ...   \n", | ||||
|        "151685  Polizei Sachsen  Polizei              Sachsen  Dresden  51.0493286   \n", | ||||
|        "151686  Polizei Sachsen  Polizei              Sachsen  Dresden  51.0493286   \n", | ||||
|        "151687              NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "151688              NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "151689   Polizei NRW UN  Polizei  Nordrhein-Westfalen     Unna  51.5348835   \n", | ||||
|        "\n", | ||||
|        "              LONG  \n", | ||||
|        "0              NaN  \n", | ||||
|        "1              NaN  \n", | ||||
|        "2          7.10066  \n", | ||||
|        "3          7.10066  \n", | ||||
|        "4              NaN  \n", | ||||
|        "...            ...  \n", | ||||
|        "151685  13.7381437  \n", | ||||
|        "151686  13.7381437  \n", | ||||
|        "151687         NaN  \n", | ||||
|        "151688         NaN  \n", | ||||
|        "151689    7.689014  \n", | ||||
|        "\n", | ||||
|        "[151690 rows x 19 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 125, | ||||
|      "execution_count": 14, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -845,14 +1184,14 @@ | |||
|     "                                         on = \"handle\",\n", | ||||
|     "                                         how = \"left\")\n", | ||||
|     "pd.options.display.max_colwidth = 100\n", | ||||
|     "tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n", | ||||
|     "tweets_attention.sort_values('like_count', ascending = False).reset_index()\n", | ||||
|     "\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 90, | ||||
|    "id": "97952234-7957-421e-bd2c-2c8261992c5a", | ||||
|    "execution_count": 42, | ||||
|    "id": "621a3b74-e909-435c-8820-b38b63aa4893", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|  | @ -972,144 +1311,12 @@ | |||
|        "[11559 rows x 3 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 90, | ||||
|      "execution_count": 42, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "old = pd.read_csv(\"data/user_old.tsv\",sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"} )\n", | ||||
|     "new = pd.read_csv(\"data/tweets-1679742702794.csv\").rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"})\n", | ||||
|     "new" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 121, | ||||
|    "id": "ed86b45e-9dd8-436d-9c96-15500ed93985", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<style scoped>\n", | ||||
|        "    .dataframe tbody tr th:only-of-type {\n", | ||||
|        "        vertical-align: middle;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe tbody tr th {\n", | ||||
|        "        vertical-align: top;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe thead th {\n", | ||||
|        "        text-align: right;\n", | ||||
|        "    }\n", | ||||
|        "</style>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>count</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>user_id</th>\n", | ||||
|        "      <th>user_name</th>\n", | ||||
|        "      <th></th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>223758384</th>\n", | ||||
|        "      <th>Polizei Sachsen</th>\n", | ||||
|        "      <td>5340</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>259607457</th>\n", | ||||
|        "      <th>Polizei NRW K</th>\n", | ||||
|        "      <td>2544</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>424895827</th>\n", | ||||
|        "      <th>Polizei Stuttgart</th>\n", | ||||
|        "      <td>1913</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>769128278</th>\n", | ||||
|        "      <th>Polizei NRW DO</th>\n", | ||||
|        "      <td>4895</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>775664780</th>\n", | ||||
|        "      <th>Polizei Rostock</th>\n", | ||||
|        "      <td>604</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1169206134189830145</th>\n", | ||||
|        "      <th>Polizei Stendal</th>\n", | ||||
|        "      <td>842</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1184022676488314880</th>\n", | ||||
|        "      <th>Polizei Pforzheim</th>\n", | ||||
|        "      <td>283</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1184024283342950401</th>\n", | ||||
|        "      <th>Polizei Ravensburg</th>\n", | ||||
|        "      <td>460</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1232548941889228808</th>\n", | ||||
|        "      <th>Systemstratege:</th>\n", | ||||
|        "      <td>168</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1295978598034284546</th>\n", | ||||
|        "      <th>Polizei ZPD NI</th>\n", | ||||
|        "      <td>133</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "<p>163 rows × 1 columns</p>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "                                        count\n", | ||||
|        "user_id             user_name                \n", | ||||
|        "223758384           Polizei Sachsen      5340\n", | ||||
|        "259607457           Polizei NRW K        2544\n", | ||||
|        "424895827           Polizei Stuttgart    1913\n", | ||||
|        "769128278           Polizei NRW DO       4895\n", | ||||
|        "775664780           Polizei Rostock       604\n", | ||||
|        "...                                       ...\n", | ||||
|        "1169206134189830145 Polizei Stendal       842\n", | ||||
|        "1184022676488314880 Polizei Pforzheim     283\n", | ||||
|        "1184024283342950401 Polizei Ravensburg    460\n", | ||||
|        "1232548941889228808 Systemstratege:       168\n", | ||||
|        "1295978598034284546 Polizei ZPD NI        133\n", | ||||
|        "\n", | ||||
|        "[163 rows x 1 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 121, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n", | ||||
|     "                                    )[\"user_id\"].aggregate(['count']\n", | ||||
|     "                                                          )" | ||||
|    ] | ||||
|    "source": [] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							|  | @ -1 +1 @@ | |||
| {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":1,"widgets":["notebook:zusammenfassung.ipynb"]},"current":"notebook:zusammenfassung.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.17943235504652827,0.8205676449534718,0]},"file-browser-filebrowser:cwd":{"path":""},"notebook:zusammenfassung.ipynb":{"data":{"path":"zusammenfassung.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}} | ||||
| {"data":{"layout-restorer:data":{"main":{"dock":{"type":"split-area","orientation":"horizontal","sizes":[0.9093610698365527,0.09063893016344725],"children":[{"type":"tab-area","currentIndex":0,"widgets":["notebook:zusammenfassung.ipynb","editor:merge_police_tweets.py"]},{"type":"tab-area","currentIndex":0,"widgets":["inspector:inspector"]}]},"current":"notebook:zusammenfassung.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.26146046543024176,0.7385395345697582,0]},"file-browser-filebrowser:cwd":{"path":""},"notebook:zusammenfassung.ipynb":{"data":{"path":"zusammenfassung.ipynb","factory":"Notebook"}},"inspector:inspector":{"data":{}},"editor:merge_police_tweets.py":{"data":{"path":"merge_police_tweets.py","factory":"Editor"}}},"metadata":{"id":"default"}} | ||||
							
								
								
									
										333095
									
								
								data/tweets_all_combined.csv
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										333095
									
								
								data/tweets_all_combined.csv
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										50
									
								
								merge_police_tweets.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								merge_police_tweets.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,50 @@ | |||
| import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| # Merging different table of old (~2021) and new (~2022) scraper | ||||
| 
 | ||||
| ## cols: hashtag, url, mention (same for both) | ||||
| tweets_meta = pd.concat([pd.read_csv("data/entity_old.tsv", sep = "\t"), # data from old scraper | ||||
|                          pd.read_csv("data/tweets.csv")]) # data from new scraper | ||||
| 
 | ||||
| ## cols: id, tweet_text, created_at, user_id; only subset from old table (same tsv used in next step) | ||||
| tweets_text = pd.concat([pd.read_csv("data/tweet_old.tsv", sep = "\t")[['id','tweet_text', 'created_at', 'user_id']].rename(columns = {"id":"tweet_id"}), | ||||
|                          pd.read_csv("data/tweets-1679742698645.csv")]) | ||||
| 
 | ||||
| ## cols: id, like_count, retweet_count, reply_count, quote_count; only subset from old table | ||||
| tweets_statistics = pd.concat([pd.read_csv("data/tweet_old.tsv", sep = "\t")[['id', 'like_count', 'retweet_count', 'reply_count', 'quote_count']].rename(columns = {"id":"tweet_id"}), | ||||
|                               pd.read_csv("data/tweets-1679742620302.csv")]) | ||||
| 
 | ||||
| ## cols: user_id, handle, user_name; colnames do not match betweend old an new data. Even username and handle seem to be mixed up in new data set (inverse order) | ||||
| ## Info: Only a small amount of user_ids appear in both data sets, but if so username occasionaly have changed an therefore can not easily be merged | ||||
| tweets_user = pd.read_csv("data/user_old.tsv",  | ||||
|                           sep = "\t").rename(columns = {"id":"user_id","name": "user_name"} # uniform names | ||||
|                                             ).merge(pd.read_csv("data/tweets-1679742702794.csv" # merge with renamed new data | ||||
|                                                                ).rename(columns = {"username":"handle", "handle": "user_name"}), # reverse col names | ||||
|                                                    on = "user_id", # user_id as matching column | ||||
|                                                    how = "outer", # keep all unique uer_ids | ||||
|                                                    suffixes = ["_2021", "_2022"]) # identify column where username and label came from | ||||
| 
 | ||||
| ## Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept. | ||||
| tweets_user = tweets_user.assign(handle    = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1), | ||||
|                                  user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1) | ||||
|                                 ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1) # no longer needed | ||||
| 
 | ||||
| ## addiditional information concerning the police stations | ||||
| ## cols: handle, name, typ, bundesland, stadt, lat, long | ||||
| police_stations = pd.read_csv("data/polizei_accounts_geo.csv", sep = "\t"   | ||||
|                              ).rename(columns = {"Polizei Account": "handle"}) | ||||
| 
 | ||||
| # Merge statistics, tweet text and user information in one data frame | ||||
| tweets_combined = pd.merge(tweets_statistics,  | ||||
|                            tweets_text, | ||||
|                            on = 'tweet_id').merge(tweets_user, on = 'user_id' | ||||
|                                                  ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index) | ||||
| 
 | ||||
| # Convert datatypes to appropriate one | ||||
| tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int) | ||||
| tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format | ||||
|                                          created_at  = pd.to_datetime(tweets_combined['created_at']), | ||||
|                                          handle      = tweets_combined['handle'].str.lower(), # handle to lower case | ||||
|                                          is_deleted  = tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable | ||||
| tweets_combined.#to_csv("data/tweets_all_combined.csv") | ||||
|  | @ -12,7 +12,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": 1, | ||||
|    "id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -37,7 +37,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 117, | ||||
|    "execution_count": 45, | ||||
|    "id": "fcc48831-7999-4d79-b722-736715b1ced6", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -46,48 +46,50 @@ | |||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "((479991, 3), (151690, 8), (151690, 4), (13327, 3))" | ||||
|        "((479991, 3), (151690, 8), (151690, 4), (13327, 3), (163, 7))" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 117, | ||||
|      "execution_count": 45, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Merging different table of old (~2021) and new (~2022) scraper\n", | ||||
|     "\n", | ||||
|     "## cols: hashtag, url, mention (same for both)\n", | ||||
|     "tweets_meta = pd.concat([pd.read_csv(\"data/entity_old.tsv\", sep = \"\\t\"), # data from old scraper\n", | ||||
|     "                         pd.read_csv(\"data/tweets.csv\")]) # data from new scraper\n", | ||||
|     "\n", | ||||
|     "tweets_text = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n", | ||||
|     "                                                                        'tweet_text', \n", | ||||
|     "                                                                        'created_at', \n", | ||||
|     "                                                                        'user_id']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "## cols: id, tweet_text, created_at, user_id; only subset from old table (same tsv used in next step)\n", | ||||
|     "tweets_text = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id','tweet_text', 'created_at', 'user_id']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "                         pd.read_csv(\"data/tweets-1679742698645.csv\")])\n", | ||||
|     "\n", | ||||
|     "tweets_statistics = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n", | ||||
|     "                                                                              'like_count', \n", | ||||
|     "                                                                              'retweet_count', \n", | ||||
|     "                                                                              'reply_count', \n", | ||||
|     "                                                                              'quote_count']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "## cols: id, like_count, retweet_count, reply_count, quote_count; only subset from old table\n", | ||||
|     "tweets_statistics = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', 'like_count', 'retweet_count', 'reply_count', 'quote_count']].rename(columns = {\"id\":\"tweet_id\"}),\n", | ||||
|     "                              pd.read_csv(\"data/tweets-1679742620302.csv\")])\n", | ||||
|     "\n", | ||||
|     "## cols: user_id, handle, user_name; colnames do not match betweend old an new data. Even username and handle seem to be mixed up in new data set (inverse order)\n", | ||||
|     "## Info: Only a small amount of user_ids appear in both data sets, but if so username occasionaly have changed an therefore can not easily be merged\n", | ||||
|     "tweets_user = pd.read_csv(\"data/user_old.tsv\", \n", | ||||
|     "                          sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"}\n", | ||||
|     "                                            ).merge(pd.read_csv(\"data/tweets-1679742702794.csv\"\n", | ||||
|     "                                                               ).rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"}),\n", | ||||
|     "                                                   on = \"user_id\",\n", | ||||
|     "                                                   how = \"outer\",\n", | ||||
|     "                                                   suffixes = [\"_2021\", \"_2022\"])\n", | ||||
|     "                          sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"} # uniform names\n", | ||||
|     "                                            ).merge(pd.read_csv(\"data/tweets-1679742702794.csv\" # merge with renamed new data\n", | ||||
|     "                                                               ).rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"}), # reverse col names\n", | ||||
|     "                                                   on = \"user_id\", # user_id as matching column\n", | ||||
|     "                                                   how = \"outer\", # keep all unique uer_ids\n", | ||||
|     "                                                   suffixes = [\"_2021\", \"_2022\"]) # identify column where username and label came from\n", | ||||
|     "\n", | ||||
|     "# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n", | ||||
|     "## Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept.\n", | ||||
|     "tweets_user = tweets_user.assign(handle    = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n", | ||||
|     "                                 user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n", | ||||
|     "                                ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n", | ||||
|     "                                ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1) # no longer needed\n", | ||||
|     "\n", | ||||
|     "police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\"  # addiditional on police stations\n", | ||||
|     "## addiditional information concerning the police stations\n", | ||||
|     "## cols: handle, name, typ, bundesland, stadt, lat, long\n", | ||||
|     "police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\"  \n", | ||||
|     "                             ).rename(columns = {\"Polizei Account\": \"handle\"})\n", | ||||
|     "\n", | ||||
|     "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape" | ||||
|     "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape, police_stations.shape" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -100,14 +102,14 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 118, | ||||
|    "execution_count": 24, | ||||
|    "id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Merge like statistics, tweet text and user information in one data frame\n", | ||||
|     "# Merge statistics, tweet text and user information in one data frame\n", | ||||
|     "tweets_combined = pd.merge(tweets_statistics, \n", | ||||
|     "                           tweets_text,\n", | ||||
|     "                           on = 'tweet_id').merge(tweets_user, on = 'user_id'\n", | ||||
|  | @ -117,22 +119,12 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 119, | ||||
|    "execution_count": 49, | ||||
|    "id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n", | ||||
|       "  output = repr(obj)\n", | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n", | ||||
|       "  return method()\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|  | @ -177,8 +169,8 @@ | |||
|        "      <td>2</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@mahanna196 Da die Stadt keine Ausnahme für Radfahrer aufgeführt hat, gilt diese (Stand jetzt) a...</td>\n", | ||||
|        "      <td>2020-10-27 09:29:13</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -192,7 +184,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@mahanna196 Ja. *sr</td>\n", | ||||
|        "      <td>2020-10-27 10:35:38</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|  | @ -207,8 +199,8 @@ | |||
|        "      <td>3</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausgelände in #Rostrup wurde ein Sprengsatz gefunden. F...</td>\n", | ||||
|        "      <td>2020-10-27 12:36:26</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -222,8 +214,8 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/2 *sr</td>\n", | ||||
|        "      <td>2020-10-27 12:59:06</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -237,8 +229,8 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>0</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>In der vergangenen Woche wurde die Wohnung des...</td>\n", | ||||
|        "      <td><NA></td>\n", | ||||
|        "      <td>In der vergangenen Woche wurde die Wohnung des Tatverdächtigen durchsucht. Dabei stellten die Be...</td>\n", | ||||
|        "      <td>2020-10-27 13:57:32</td>\n", | ||||
|        "      <td>778895426007203840</td>\n", | ||||
|        "      <td>polizei_ol</td>\n", | ||||
|  | @ -268,7 +260,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-19 13:40:36</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n", | ||||
|        "      <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 hat zu diesem Thema wieder einmal die Puppen tanze...</td>\n", | ||||
|        "      <td>2023-02-15 12:06:07</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -283,7 +275,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-25 13:14:49</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n", | ||||
|        "      <td>Unser Präventionsteam vom #A44 berät heute und morgen tagsüber zum Thema Alkohol &amp; Drogen + ...</td>\n", | ||||
|        "      <td>2023-02-21 12:10:00</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -298,7 +290,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-25 13:14:49</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n", | ||||
|        "      <td>Auch unser #A52 war heute aktiv und hat zum Thema Alkohol &amp; Drogen im Straßenverkehr beraten...</td>\n", | ||||
|        "      <td>2023-02-21 12:12:48</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -313,7 +305,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-26 13:15:05</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n", | ||||
|        "      <td>Gestern führte unser #A13 in einer Wohnsiedlung einen Präventionseinsatz zum Thema „Wohnraumeinb...</td>\n", | ||||
|        "      <td>2023-02-22 11:15:58</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -328,7 +320,7 @@ | |||
|        "      <td>0</td>\n", | ||||
|        "      <td>2023-02-27 12:17:33</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n", | ||||
|        "      <td>Auf dem Gelände der @BUFAStudios (Oberlandstr. 26-35) findet heute die #Seniorenmesse vom Bezirk...</td>\n", | ||||
|        "      <td>2023-02-23 10:53:07</td>\n", | ||||
|        "      <td>1168873095614160896</td>\n", | ||||
|        "      <td>polizeiberlin_p</td>\n", | ||||
|  | @ -353,44 +345,44 @@ | |||
|        "151688  1628352896352878593           2              0            0   \n", | ||||
|        "151689  1628709531998998529          10              1            0   \n", | ||||
|        "\n", | ||||
|        "        quote_count         measured_at is_deleted  \\\n", | ||||
|        "0                 0                 NaT        NaN   \n", | ||||
|        "1                 0                 NaT        NaN   \n", | ||||
|        "2                 0                 NaT        NaN   \n", | ||||
|        "3                 0                 NaT        NaN   \n", | ||||
|        "4                 0                 NaT        NaN   \n", | ||||
|        "...             ...                 ...        ...   \n", | ||||
|        "151685            0 2023-02-19 13:40:36      False   \n", | ||||
|        "151686            0 2023-02-25 13:14:49      False   \n", | ||||
|        "151687            0 2023-02-25 13:14:49      False   \n", | ||||
|        "151688            0 2023-02-26 13:15:05      False   \n", | ||||
|        "151689            0 2023-02-27 12:17:33      False   \n", | ||||
|        "        quote_count         measured_at  is_deleted  \\\n", | ||||
|        "0                 0                 NaT        <NA>   \n", | ||||
|        "1                 0                 NaT        <NA>   \n", | ||||
|        "2                 0                 NaT        <NA>   \n", | ||||
|        "3                 0                 NaT        <NA>   \n", | ||||
|        "4                 0                 NaT        <NA>   \n", | ||||
|        "...             ...                 ...         ...   \n", | ||||
|        "151685            0 2023-02-19 13:40:36       False   \n", | ||||
|        "151686            0 2023-02-25 13:14:49       False   \n", | ||||
|        "151687            0 2023-02-25 13:14:49       False   \n", | ||||
|        "151688            0 2023-02-26 13:15:05       False   \n", | ||||
|        "151689            0 2023-02-27 12:17:33       False   \n", | ||||
|        "\n", | ||||
|        "                                               tweet_text          created_at  \\\n", | ||||
|        "0       @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13   \n", | ||||
|        "1                                     @mahanna196 Ja. *sr 2020-10-27 10:35:38   \n", | ||||
|        "2       #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26   \n", | ||||
|        "3       @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06   \n", | ||||
|        "4       In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32   \n", | ||||
|        "...                                                   ...                 ...   \n", | ||||
|        "151685  #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07   \n", | ||||
|        "151686  Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00   \n", | ||||
|        "151687  Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48   \n", | ||||
|        "151688  Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58   \n", | ||||
|        "151689  Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07   \n", | ||||
|        "                                                                                                 tweet_text  \\\n", | ||||
|        "0       @mahanna196 Da die Stadt keine Ausnahme für Radfahrer aufgeführt hat, gilt diese (Stand jetzt) a...   \n", | ||||
|        "1                                                                                       @mahanna196 Ja. *sr   \n", | ||||
|        "2       #Aktuell Auf dem ehem. Bundeswehrkrankenhausgelände in #Rostrup wurde ein Sprengsatz gefunden. F...   \n", | ||||
|        "3                                                       @Emma36166433 Bitte lesen Sie unseren Tweet 2/2 *sr   \n", | ||||
|        "4       In der vergangenen Woche wurde die Wohnung des Tatverdächtigen durchsucht. Dabei stellten die Be...   \n", | ||||
|        "...                                                                                                     ...   \n", | ||||
|        "151685  #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 hat zu diesem Thema wieder einmal die Puppen tanze...   \n", | ||||
|        "151686  Unser Präventionsteam vom #A44 berät heute und morgen tagsüber zum Thema Alkohol & Drogen + ...   \n", | ||||
|        "151687  Auch unser #A52 war heute aktiv und hat zum Thema Alkohol & Drogen im Straßenverkehr beraten...   \n", | ||||
|        "151688  Gestern führte unser #A13 in einer Wohnsiedlung einen Präventionseinsatz zum Thema „Wohnraumeinb...   \n", | ||||
|        "151689  Auf dem Gelände der @BUFAStudios (Oberlandstr. 26-35) findet heute die #Seniorenmesse vom Bezirk...   \n", | ||||
|        "\n", | ||||
|        "                    user_id           handle  \\\n", | ||||
|        "0        778895426007203840       polizei_ol   \n", | ||||
|        "1        778895426007203840       polizei_ol   \n", | ||||
|        "2        778895426007203840       polizei_ol   \n", | ||||
|        "3        778895426007203840       polizei_ol   \n", | ||||
|        "4        778895426007203840       polizei_ol   \n", | ||||
|        "...                     ...              ...   \n", | ||||
|        "151685  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151686  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151687  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151688  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151689  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "                created_at              user_id           handle  \\\n", | ||||
|        "0      2020-10-27 09:29:13   778895426007203840       polizei_ol   \n", | ||||
|        "1      2020-10-27 10:35:38   778895426007203840       polizei_ol   \n", | ||||
|        "2      2020-10-27 12:36:26   778895426007203840       polizei_ol   \n", | ||||
|        "3      2020-10-27 12:59:06   778895426007203840       polizei_ol   \n", | ||||
|        "4      2020-10-27 13:57:32   778895426007203840       polizei_ol   \n", | ||||
|        "...                    ...                  ...              ...   \n", | ||||
|        "151685 2023-02-15 12:06:07  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151686 2023-02-21 12:10:00  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151687 2023-02-21 12:12:48  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151688 2023-02-22 11:15:58  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "151689 2023-02-23 10:53:07  1168873095614160896  polizeiberlin_p   \n", | ||||
|        "\n", | ||||
|        "                                user_name  \n", | ||||
|        "0       Polizei Oldenburg-Stadt/Ammerland  \n", | ||||
|  | @ -408,19 +400,19 @@ | |||
|        "[151690 rows x 12 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 119, | ||||
|      "execution_count": 49, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Convert Counts to integer values\n", | ||||
|     "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n", | ||||
|     "# Convert datatypes to appropriate one\n", | ||||
|     "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(np.NAN).astype(int)\n", | ||||
|     "tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n", | ||||
|     "                                         created_at  = pd.to_datetime(tweets_combined['created_at']),\n", | ||||
|     "                                         handle      = tweets_combined['handle'].str.lower(),\n", | ||||
|     "                                         is_deleted  = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n", | ||||
|     "tweets_combined" | ||||
|     "                                         handle      = tweets_combined['handle'].str.lower(), # handle to lower case\n", | ||||
|     "                                         is_deleted  = tweets_combined['is_deleted'].astype('boolean')) # is deleted column as boolean variable\n", | ||||
|     "tweets_combined#.to_csv(\"data/tweets_all_combined.csv\")" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -472,7 +464,7 @@ | |||
|    "source": [ | ||||
|     "## Metadaten \n", | ||||
|     "\n", | ||||
|     "Welche Daten bilden die Grundlage?" | ||||
|     "Welche Daten bilden die Grundlage?\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -537,7 +529,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 114, | ||||
|    "execution_count": 10, | ||||
|    "id": "4f1e8c6c-3610-436e-899e-4d0307259230", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -547,12 +539,12 @@ | |||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Die Tweets wurden vom  2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n" | ||||
|       "Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n", | ||||
|     "print(\"Die Tweets wurden vom\", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n", | ||||
|     "# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag" | ||||
|    ] | ||||
|   }, | ||||
|  | @ -568,7 +560,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 122, | ||||
|    "execution_count": 11, | ||||
|    "id": "9373552e-6baf-46df-ae16-c63603e20a83", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -681,7 +673,7 @@ | |||
|        "61   Hamburg   53.550341   10.000654  " | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 122, | ||||
|      "execution_count": 11, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -710,31 +702,23 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 123, | ||||
|    "execution_count": 13, | ||||
|    "id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", | ||||
|       "  for col_name, dtype in df.dtypes.iteritems():\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "\n", | ||||
|        "<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n", | ||||
|        "<div id=\"altair-viz-7b78525a62b243eca7b1f4044a328f47\"></div>\n", | ||||
|        "<script type=\"text/javascript\">\n", | ||||
|        "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n", | ||||
|        "  (function(spec, embedOpt){\n", | ||||
|        "    let outputDiv = document.currentScript.previousElementSibling;\n", | ||||
|        "    if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n", | ||||
|        "      outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n", | ||||
|        "    if (outputDiv.id !== \"altair-viz-7b78525a62b243eca7b1f4044a328f47\") {\n", | ||||
|        "      outputDiv = document.getElementById(\"altair-viz-7b78525a62b243eca7b1f4044a328f47\");\n", | ||||
|        "    }\n", | ||||
|        "    const paths = {\n", | ||||
|        "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", | ||||
|  | @ -780,14 +764,14 @@ | |||
|        "        .catch(showError)\n", | ||||
|        "        .then(() => displayChart(vegaEmbed));\n", | ||||
|        "    }\n", | ||||
|        "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n", | ||||
|        "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"ordinal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n", | ||||
|        "</script>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "alt.Chart(...)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 123, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -795,7 +779,7 @@ | |||
|    "source": [ | ||||
|     "barchart = alt.Chart(activy_police_vis[0:15]).mark_bar().encode(\n", | ||||
|     "    x = 'count:Q',\n", | ||||
|     "    y = alt.Y('handle:N', sort = '-x'),\n", | ||||
|     "    y = alt.Y('handle:O', sort = '-x'),\n", | ||||
|     ")\n", | ||||
|     "barchart " | ||||
|    ] | ||||
|  | @ -812,7 +796,7 @@ | |||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 125, | ||||
|    "execution_count": 14, | ||||
|    "id": "d0549250-b11f-4762-8500-1134c53303b4", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|  | @ -820,22 +804,377 @@ | |||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<style scoped>\n", | ||||
|        "    .dataframe tbody tr th:only-of-type {\n", | ||||
|        "        vertical-align: middle;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe tbody tr th {\n", | ||||
|        "        vertical-align: top;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe thead th {\n", | ||||
|        "        text-align: right;\n", | ||||
|        "    }\n", | ||||
|        "</style>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>index</th>\n", | ||||
|        "      <th>tweet_id</th>\n", | ||||
|        "      <th>like_count</th>\n", | ||||
|        "      <th>retweet_count</th>\n", | ||||
|        "      <th>reply_count</th>\n", | ||||
|        "      <th>quote_count</th>\n", | ||||
|        "      <th>measured_at</th>\n", | ||||
|        "      <th>is_deleted</th>\n", | ||||
|        "      <th>tweet_text</th>\n", | ||||
|        "      <th>created_at</th>\n", | ||||
|        "      <th>user_id</th>\n", | ||||
|        "      <th>handle</th>\n", | ||||
|        "      <th>user_name</th>\n", | ||||
|        "      <th>Name</th>\n", | ||||
|        "      <th>Typ</th>\n", | ||||
|        "      <th>Bundesland</th>\n", | ||||
|        "      <th>Stadt</th>\n", | ||||
|        "      <th>LAT</th>\n", | ||||
|        "      <th>LONG</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>0</th>\n", | ||||
|        "      <td>3053</td>\n", | ||||
|        "      <td>1609539240458878979</td>\n", | ||||
|        "      <td>21455</td>\n", | ||||
|        "      <td>1845</td>\n", | ||||
|        "      <td>3643</td>\n", | ||||
|        "      <td>341</td>\n", | ||||
|        "      <td>2023-01-05 14:44:34</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...</td>\n", | ||||
|        "      <td>2023-01-01 13:17:13</td>\n", | ||||
|        "      <td>2397974054</td>\n", | ||||
|        "      <td>polizeiberlin</td>\n", | ||||
|        "      <td>Polizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1</th>\n", | ||||
|        "      <td>1331</td>\n", | ||||
|        "      <td>1355179228396879872</td>\n", | ||||
|        "      <td>19186</td>\n", | ||||
|        "      <td>3386</td>\n", | ||||
|        "      <td>1203</td>\n", | ||||
|        "      <td>628</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...</td>\n", | ||||
|        "      <td>2021-01-29 15:41:20</td>\n", | ||||
|        "      <td>2397974054</td>\n", | ||||
|        "      <td>polizeiberlin</td>\n", | ||||
|        "      <td>Polizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>2</th>\n", | ||||
|        "      <td>91693</td>\n", | ||||
|        "      <td>1505620459148173316</td>\n", | ||||
|        "      <td>15708</td>\n", | ||||
|        "      <td>7098</td>\n", | ||||
|        "      <td>186</td>\n", | ||||
|        "      <td>540</td>\n", | ||||
|        "      <td>2022-03-24 20:15:08</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...</td>\n", | ||||
|        "      <td>2022-03-20 19:01:05</td>\n", | ||||
|        "      <td>2389161066</td>\n", | ||||
|        "      <td>polizei_nrw_bn</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Bonn</td>\n", | ||||
|        "      <td>50.735851</td>\n", | ||||
|        "      <td>7.10066</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>3</th>\n", | ||||
|        "      <td>91695</td>\n", | ||||
|        "      <td>1505620666476896259</td>\n", | ||||
|        "      <td>10337</td>\n", | ||||
|        "      <td>1539</td>\n", | ||||
|        "      <td>59</td>\n", | ||||
|        "      <td>35</td>\n", | ||||
|        "      <td>2022-03-24 20:15:08</td>\n", | ||||
|        "      <td>False</td>\n", | ||||
|        "      <td>Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...</td>\n", | ||||
|        "      <td>2022-03-20 19:01:54</td>\n", | ||||
|        "      <td>2389161066</td>\n", | ||||
|        "      <td>polizei_nrw_bn</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei NRW BN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Bonn</td>\n", | ||||
|        "      <td>50.735851</td>\n", | ||||
|        "      <td>7.10066</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>4</th>\n", | ||||
|        "      <td>122631</td>\n", | ||||
|        "      <td>1359098196434292739</td>\n", | ||||
|        "      <td>9471</td>\n", | ||||
|        "      <td>642</td>\n", | ||||
|        "      <td>128</td>\n", | ||||
|        "      <td>102</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2</td>\n", | ||||
|        "      <td>2021-02-09 11:13:55</td>\n", | ||||
|        "      <td>4876039738</td>\n", | ||||
|        "      <td>bpol_b</td>\n", | ||||
|        "      <td>Bundespolizei Berlin</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151685</th>\n", | ||||
|        "      <td>7569</td>\n", | ||||
|        "      <td>1332625325654757377</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...</td>\n", | ||||
|        "      <td>2020-11-28 10:00:11</td>\n", | ||||
|        "      <td>223758384</td>\n", | ||||
|        "      <td>polizeisachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Sachsen</td>\n", | ||||
|        "      <td>Dresden</td>\n", | ||||
|        "      <td>51.0493286</td>\n", | ||||
|        "      <td>13.7381437</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151686</th>\n", | ||||
|        "      <td>7572</td>\n", | ||||
|        "      <td>1332738525507186692</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...</td>\n", | ||||
|        "      <td>2020-11-28 17:30:00</td>\n", | ||||
|        "      <td>223758384</td>\n", | ||||
|        "      <td>polizeisachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei Sachsen</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Sachsen</td>\n", | ||||
|        "      <td>Dresden</td>\n", | ||||
|        "      <td>51.0493286</td>\n", | ||||
|        "      <td>13.7381437</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151687</th>\n", | ||||
|        "      <td>144702</td>\n", | ||||
|        "      <td>1465679768494526467</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...</td>\n", | ||||
|        "      <td>2021-11-30 13:51:02</td>\n", | ||||
|        "      <td>4876085224</td>\n", | ||||
|        "      <td>bpol_nord</td>\n", | ||||
|        "      <td>Bundespolizei Nord</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151688</th>\n", | ||||
|        "      <td>144701</td>\n", | ||||
|        "      <td>1464124290605977600</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>@gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...</td>\n", | ||||
|        "      <td>2021-11-26 06:50:07</td>\n", | ||||
|        "      <td>4876085224</td>\n", | ||||
|        "      <td>bpol_nord</td>\n", | ||||
|        "      <td>Bundespolizei Nord</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>151689</th>\n", | ||||
|        "      <td>66854</td>\n", | ||||
|        "      <td>1376453040283209728</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>-99</td>\n", | ||||
|        "      <td>NaT</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>#Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...</td>\n", | ||||
|        "      <td>2021-03-29 08:35:52</td>\n", | ||||
|        "      <td>2389263558</td>\n", | ||||
|        "      <td>polizei_nrw_un</td>\n", | ||||
|        "      <td>Polizei NRW UN</td>\n", | ||||
|        "      <td>Polizei NRW UN</td>\n", | ||||
|        "      <td>Polizei</td>\n", | ||||
|        "      <td>Nordrhein-Westfalen</td>\n", | ||||
|        "      <td>Unna</td>\n", | ||||
|        "      <td>51.5348835</td>\n", | ||||
|        "      <td>7.689014</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "<p>151690 rows × 19 columns</p>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "0         Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n", | ||||
|        "1         An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n", | ||||
|        "2         WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n", | ||||
|        "3         Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n", | ||||
|        "4                                             Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n", | ||||
|        "                                                         ...                                                 \n", | ||||
|        "151685    Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n", | ||||
|        "151686    📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n", | ||||
|        "151687    Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n", | ||||
|        "151688    @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n", | ||||
|        "151689    #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n", | ||||
|        "Name: tweet_text, Length: 151690, dtype: object" | ||||
|        "         index             tweet_id  like_count  retweet_count  reply_count  \\\n", | ||||
|        "0         3053  1609539240458878979       21455           1845         3643   \n", | ||||
|        "1         1331  1355179228396879872       19186           3386         1203   \n", | ||||
|        "2        91693  1505620459148173316       15708           7098          186   \n", | ||||
|        "3        91695  1505620666476896259       10337           1539           59   \n", | ||||
|        "4       122631  1359098196434292739        9471            642          128   \n", | ||||
|        "...        ...                  ...         ...            ...          ...   \n", | ||||
|        "151685    7569  1332625325654757377         -99            -99          -99   \n", | ||||
|        "151686    7572  1332738525507186692         -99            -99          -99   \n", | ||||
|        "151687  144702  1465679768494526467         -99            -99          -99   \n", | ||||
|        "151688  144701  1464124290605977600         -99            -99          -99   \n", | ||||
|        "151689   66854  1376453040283209728         -99            -99          -99   \n", | ||||
|        "\n", | ||||
|        "        quote_count         measured_at is_deleted  \\\n", | ||||
|        "0               341 2023-01-05 14:44:34      False   \n", | ||||
|        "1               628                 NaT        NaN   \n", | ||||
|        "2               540 2022-03-24 20:15:08      False   \n", | ||||
|        "3                35 2022-03-24 20:15:08      False   \n", | ||||
|        "4               102                 NaT        NaN   \n", | ||||
|        "...             ...                 ...        ...   \n", | ||||
|        "151685          -99                 NaT        NaN   \n", | ||||
|        "151686          -99                 NaT        NaN   \n", | ||||
|        "151687          -99                 NaT        NaN   \n", | ||||
|        "151688          -99                 NaT        NaN   \n", | ||||
|        "151689          -99                 NaT        NaN   \n", | ||||
|        "\n", | ||||
|        "                                                                                                 tweet_text  \\\n", | ||||
|        "0       Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...   \n", | ||||
|        "1       An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...   \n", | ||||
|        "2       WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...   \n", | ||||
|        "3       Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...   \n", | ||||
|        "4                                           Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2   \n", | ||||
|        "...                                                                                                     ...   \n", | ||||
|        "151685  Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...   \n", | ||||
|        "151686  📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...   \n", | ||||
|        "151687  Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...   \n", | ||||
|        "151688  @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...   \n", | ||||
|        "151689  #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...   \n", | ||||
|        "\n", | ||||
|        "                created_at     user_id          handle             user_name  \\\n", | ||||
|        "0      2023-01-01 13:17:13  2397974054   polizeiberlin        Polizei Berlin   \n", | ||||
|        "1      2021-01-29 15:41:20  2397974054   polizeiberlin        Polizei Berlin   \n", | ||||
|        "2      2022-03-20 19:01:05  2389161066  polizei_nrw_bn        Polizei NRW BN   \n", | ||||
|        "3      2022-03-20 19:01:54  2389161066  polizei_nrw_bn        Polizei NRW BN   \n", | ||||
|        "4      2021-02-09 11:13:55  4876039738          bpol_b  Bundespolizei Berlin   \n", | ||||
|        "...                    ...         ...             ...                   ...   \n", | ||||
|        "151685 2020-11-28 10:00:11   223758384  polizeisachsen       Polizei Sachsen   \n", | ||||
|        "151686 2020-11-28 17:30:00   223758384  polizeisachsen       Polizei Sachsen   \n", | ||||
|        "151687 2021-11-30 13:51:02  4876085224       bpol_nord    Bundespolizei Nord   \n", | ||||
|        "151688 2021-11-26 06:50:07  4876085224       bpol_nord    Bundespolizei Nord   \n", | ||||
|        "151689 2021-03-29 08:35:52  2389263558  polizei_nrw_un        Polizei NRW UN   \n", | ||||
|        "\n", | ||||
|        "                   Name      Typ           Bundesland    Stadt         LAT  \\\n", | ||||
|        "0                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "1                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "2        Polizei NRW BN  Polizei  Nordrhein-Westfalen     Bonn   50.735851   \n", | ||||
|        "3        Polizei NRW BN  Polizei  Nordrhein-Westfalen     Bonn   50.735851   \n", | ||||
|        "4                   NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "...                 ...      ...                  ...      ...         ...   \n", | ||||
|        "151685  Polizei Sachsen  Polizei              Sachsen  Dresden  51.0493286   \n", | ||||
|        "151686  Polizei Sachsen  Polizei              Sachsen  Dresden  51.0493286   \n", | ||||
|        "151687              NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "151688              NaN      NaN                  NaN      NaN         NaN   \n", | ||||
|        "151689   Polizei NRW UN  Polizei  Nordrhein-Westfalen     Unna  51.5348835   \n", | ||||
|        "\n", | ||||
|        "              LONG  \n", | ||||
|        "0              NaN  \n", | ||||
|        "1              NaN  \n", | ||||
|        "2          7.10066  \n", | ||||
|        "3          7.10066  \n", | ||||
|        "4              NaN  \n", | ||||
|        "...            ...  \n", | ||||
|        "151685  13.7381437  \n", | ||||
|        "151686  13.7381437  \n", | ||||
|        "151687         NaN  \n", | ||||
|        "151688         NaN  \n", | ||||
|        "151689    7.689014  \n", | ||||
|        "\n", | ||||
|        "[151690 rows x 19 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 125, | ||||
|      "execution_count": 14, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|  | @ -845,14 +1184,14 @@ | |||
|     "                                         on = \"handle\",\n", | ||||
|     "                                         how = \"left\")\n", | ||||
|     "pd.options.display.max_colwidth = 100\n", | ||||
|     "tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n", | ||||
|     "tweets_attention.sort_values('like_count', ascending = False).reset_index()\n", | ||||
|     "\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 90, | ||||
|    "id": "97952234-7957-421e-bd2c-2c8261992c5a", | ||||
|    "execution_count": 42, | ||||
|    "id": "621a3b74-e909-435c-8820-b38b63aa4893", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|  | @ -972,144 +1311,12 @@ | |||
|        "[11559 rows x 3 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 90, | ||||
|      "execution_count": 42, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "old = pd.read_csv(\"data/user_old.tsv\",sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"} )\n", | ||||
|     "new = pd.read_csv(\"data/tweets-1679742702794.csv\").rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"})\n", | ||||
|     "new" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 121, | ||||
|    "id": "ed86b45e-9dd8-436d-9c96-15500ed93985", | ||||
|    "metadata": { | ||||
|     "tags": [] | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<style scoped>\n", | ||||
|        "    .dataframe tbody tr th:only-of-type {\n", | ||||
|        "        vertical-align: middle;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe tbody tr th {\n", | ||||
|        "        vertical-align: top;\n", | ||||
|        "    }\n", | ||||
|        "\n", | ||||
|        "    .dataframe thead th {\n", | ||||
|        "        text-align: right;\n", | ||||
|        "    }\n", | ||||
|        "</style>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>count</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>user_id</th>\n", | ||||
|        "      <th>user_name</th>\n", | ||||
|        "      <th></th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>223758384</th>\n", | ||||
|        "      <th>Polizei Sachsen</th>\n", | ||||
|        "      <td>5340</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>259607457</th>\n", | ||||
|        "      <th>Polizei NRW K</th>\n", | ||||
|        "      <td>2544</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>424895827</th>\n", | ||||
|        "      <th>Polizei Stuttgart</th>\n", | ||||
|        "      <td>1913</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>769128278</th>\n", | ||||
|        "      <th>Polizei NRW DO</th>\n", | ||||
|        "      <td>4895</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>775664780</th>\n", | ||||
|        "      <th>Polizei Rostock</th>\n", | ||||
|        "      <td>604</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1169206134189830145</th>\n", | ||||
|        "      <th>Polizei Stendal</th>\n", | ||||
|        "      <td>842</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1184022676488314880</th>\n", | ||||
|        "      <th>Polizei Pforzheim</th>\n", | ||||
|        "      <td>283</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1184024283342950401</th>\n", | ||||
|        "      <th>Polizei Ravensburg</th>\n", | ||||
|        "      <td>460</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1232548941889228808</th>\n", | ||||
|        "      <th>Systemstratege:</th>\n", | ||||
|        "      <td>168</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1295978598034284546</th>\n", | ||||
|        "      <th>Polizei ZPD NI</th>\n", | ||||
|        "      <td>133</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "<p>163 rows × 1 columns</p>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "                                        count\n", | ||||
|        "user_id             user_name                \n", | ||||
|        "223758384           Polizei Sachsen      5340\n", | ||||
|        "259607457           Polizei NRW K        2544\n", | ||||
|        "424895827           Polizei Stuttgart    1913\n", | ||||
|        "769128278           Polizei NRW DO       4895\n", | ||||
|        "775664780           Polizei Rostock       604\n", | ||||
|        "...                                       ...\n", | ||||
|        "1169206134189830145 Polizei Stendal       842\n", | ||||
|        "1184022676488314880 Polizei Pforzheim     283\n", | ||||
|        "1184024283342950401 Polizei Ravensburg    460\n", | ||||
|        "1232548941889228808 Systemstratege:       168\n", | ||||
|        "1295978598034284546 Polizei ZPD NI        133\n", | ||||
|        "\n", | ||||
|        "[163 rows x 1 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 121, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n", | ||||
|     "                                    )[\"user_id\"].aggregate(['count']\n", | ||||
|     "                                                          )" | ||||
|    ] | ||||
|    "source": [] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue