{ "cells": [ { "cell_type": "markdown", "id": "112e14ce", "metadata": {}, "source": [ "## Merge state data and user data" ] }, { "cell_type": "code", "execution_count": 40, "id": "e75eccbf", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "tw_tweets = pd.read_csv(r'..\\data\\copbird_table_tweet.csv')\n", "tw_user_data = pd.read_csv(r'..\\data\\copbird_table_user.csv')\n", "tw_pol_geo_data = pd.read_csv(r'..\\geolocations\\polizei_accounts_geo.csv', delimiter='\\t')" ] }, { "cell_type": "code", "execution_count": 41, "id": "be63403d", "metadata": {}, "outputs": [], "source": [ "def add_state_to_user_df():\n", " tw_user_df = tw_user_data.rename(columns={\"id\": \"user_id\"})\n", " tw_pol_geo_df = tw_pol_geo_data.rename(columns={\"Name\": \"name\", \"Bundesland\": \"bundesland\", \"Stadt\": \"stadt\"})\n", "\n", " # merge tw_pol_geo_df into tw_user_df on key_col ('name') and only add col 'bundesland' to new df based on user_df\n", " return pd.merge(tw_user_df, tw_pol_geo_df[['name', 'stadt', 'bundesland']], on='name', how='left')" ] }, { "cell_type": "code", "execution_count": 42, "id": "a9093ae6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idnamehandlestadtbundesland
01032561433102434304Polizei WittlichPolizeiWittlichWittlichRheinland-Pfalz
11143867545226764293Bayerisches LandeskriminalamtLKA_BayernMünchenBayern
21169206134189830145Polizei StendalPolizei_SDLStendalSachsen-Anhalt
31184024283342950401Polizei RavensburgPolizeiRVRavensburgBaden-Württemberg
41232548941889228808Polizei Bad NenndorfPolizei_BadNBad NenndorfNiedersachsen
51295978598034284546Polizei ZPD NIPolizei_ZPD_NIHannoverNiedersachsen
61487390240Polizei KoblenzPolizei_KOKoblenzRheinland-Pfalz
71968267000Polizei MainzPolizeiMainzMainzRheinland-Pfalz
81979596724Polizei NRW OBpolizei_nrw_obOberhausenNordrhein-Westfalen
92176104583Polizei RheinpfalzPP_RheinpfalzLudwigshafen am RheinRheinland-Pfalz
\n", "
" ], "text/plain": [ " user_id name handle \\\n", "0 1032561433102434304 Polizei Wittlich PolizeiWittlich \n", "1 1143867545226764293 Bayerisches Landeskriminalamt LKA_Bayern \n", "2 1169206134189830145 Polizei Stendal Polizei_SDL \n", "3 1184024283342950401 Polizei Ravensburg PolizeiRV \n", "4 1232548941889228808 Polizei Bad Nenndorf Polizei_BadN \n", "5 1295978598034284546 Polizei ZPD NI Polizei_ZPD_NI \n", "6 1487390240 Polizei Koblenz Polizei_KO \n", "7 1968267000 Polizei Mainz PolizeiMainz \n", "8 1979596724 Polizei NRW OB polizei_nrw_ob \n", "9 2176104583 Polizei Rheinpfalz PP_Rheinpfalz \n", "\n", " stadt bundesland \n", "0 Wittlich Rheinland-Pfalz \n", "1 München Bayern \n", "2 Stendal Sachsen-Anhalt \n", "3 Ravensburg Baden-Württemberg \n", "4 Bad Nenndorf Niedersachsen \n", "5 Hannover Niedersachsen \n", "6 Koblenz Rheinland-Pfalz \n", "7 Mainz Rheinland-Pfalz \n", "8 Oberhausen Nordrhein-Westfalen \n", "9 Ludwigshafen am Rhein Rheinland-Pfalz " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "add_state_to_user_df()[:10]" ] }, { "cell_type": "markdown", "id": "f7f32d78", "metadata": {}, "source": [ "## insert state data into tweet data" ] }, { "cell_type": "code", "execution_count": 43, "id": "20d4a15a", "metadata": {}, "outputs": [], "source": [ "def add_state_to_tweets_df():\n", " tw_tweets_ext = pd.merge(tw_tweets, add_state_to_user_df()[['user_id', 'stadt', 'bundesland', 'name', 'handle'\n", " ]], on='user_id', how='left')\n", " return tw_tweets_ext[['id', 'tweet_text', 'created_at', 'user_id', 'name', 'handle', 'stadt', 'bundesland'\n", " ]].rename(columns={'id': 'tweet_id', 'name': 'user_name'})" ] }, { "cell_type": "code", "execution_count": 44, "id": "7c9f4add", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_idtweet_textcreated_atuser_iduser_namehandlestadtbundesland
01321021123463663616@mahanna196 Da die Stadt keine Ausnahme für Ra...2020-10-27 09:29:13778895426007203840Polizei Oldenburg-Stadt/AmmerlPolizei_OLNaNNaN
11321023114071969792#Zeugengesucht\\nDie Hintergründe zu dem Tötung...2020-10-27 09:37:082397974054Polizei BerlinpolizeiberlinBerlinBerlin
21321025127388188673RT @bka: EUROPE´S MOST WANTED – Sexualstraftät...2020-10-27 09:45:082397974054Polizei BerlinpolizeiberlinBerlinBerlin
31321028108665950208@StrupeitVolker Wir verstehen nicht so recht w...2020-10-27 09:56:592810902381Polizei MünchenPolizeiMuenchenMünchenBayern
41321029199998656513Wir unterstützen das @bka bei der #Öffentlichk...2020-10-27 10:01:19223758384Polizei SachsenPolizeiSachsenDresdenSachsen
51321029204964745218Heute früh brannte es auf unserem Sicherstellu...2020-10-27 10:01:202397974054Polizei BerlinpolizeiberlinBerlinBerlin
61321031650483019776Als ein Supermarktmitarbeiter in #Fürstenwalde...2020-10-27 10:11:03720244303566483456Polizei BrandenburgPolizeiBBPotsdamBrandenburg
71321031765209829379Auf der Waldecker Straße haben bislang unbekan...2020-10-27 10:11:312389222849Polizei NRW DUpolizei_nrw_duDuisburgNordrhein-Westfalen
81321032307277443072@Sitewinder Es handelte sich um einen 22-jähri...2020-10-27 10:13:40720244303566483456Polizei BrandenburgPolizeiBBPotsdamBrandenburg
91321033945467834368@GunstickULM Es bestand der Verdacht, dass Pla...2020-10-27 10:20:10720244303566483456Polizei BrandenburgPolizeiBBPotsdamBrandenburg
\n", "
" ], "text/plain": [ " tweet_id tweet_text \\\n", "0 1321021123463663616 @mahanna196 Da die Stadt keine Ausnahme für Ra... \n", "1 1321023114071969792 #Zeugengesucht\\nDie Hintergründe zu dem Tötung... \n", "2 1321025127388188673 RT @bka: EUROPE´S MOST WANTED – Sexualstraftät... \n", "3 1321028108665950208 @StrupeitVolker Wir verstehen nicht so recht w... \n", "4 1321029199998656513 Wir unterstützen das @bka bei der #Öffentlichk... \n", "5 1321029204964745218 Heute früh brannte es auf unserem Sicherstellu... \n", "6 1321031650483019776 Als ein Supermarktmitarbeiter in #Fürstenwalde... \n", "7 1321031765209829379 Auf der Waldecker Straße haben bislang unbekan... \n", "8 1321032307277443072 @Sitewinder Es handelte sich um einen 22-jähri... \n", "9 1321033945467834368 @GunstickULM Es bestand der Verdacht, dass Pla... \n", "\n", " created_at user_id user_name \\\n", "0 2020-10-27 09:29:13 778895426007203840 Polizei Oldenburg-Stadt/Ammerl \n", "1 2020-10-27 09:37:08 2397974054 Polizei Berlin \n", "2 2020-10-27 09:45:08 2397974054 Polizei Berlin \n", "3 2020-10-27 09:56:59 2810902381 Polizei München \n", "4 2020-10-27 10:01:19 223758384 Polizei Sachsen \n", "5 2020-10-27 10:01:20 2397974054 Polizei Berlin \n", "6 2020-10-27 10:11:03 720244303566483456 Polizei Brandenburg \n", "7 2020-10-27 10:11:31 2389222849 Polizei NRW DU \n", "8 2020-10-27 10:13:40 720244303566483456 Polizei Brandenburg \n", "9 2020-10-27 10:20:10 720244303566483456 Polizei Brandenburg \n", "\n", " handle stadt bundesland \n", "0 Polizei_OL NaN NaN \n", "1 polizeiberlin Berlin Berlin \n", "2 polizeiberlin Berlin Berlin \n", "3 PolizeiMuenchen München Bayern \n", "4 PolizeiSachsen Dresden Sachsen \n", "5 polizeiberlin Berlin Berlin \n", "6 PolizeiBB Potsdam Brandenburg \n", "7 polizei_nrw_du Duisburg Nordrhein-Westfalen \n", "8 PolizeiBB Potsdam Brandenburg \n", "9 PolizeiBB Potsdam Brandenburg " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "add_state_to_tweets_df()[:10]" ] }, { "cell_type": "markdown", "id": "327f77a1", "metadata": {}, "source": [ "## Mapping citys/police stations from Pressemitteilungen and twitter-data" ] }, { "cell_type": "markdown", "id": "de1410ce", "metadata": {}, "source": [ "### Extract cities, user_id from Pressemitteilungen and Tweets" ] }, { "cell_type": "code", "execution_count": 49, "id": "387de0f4", "metadata": {}, "outputs": [], "source": [ "def map_bl_tw_citys():\n", " import re\n", " df_blaulicht = pd.read_csv(r'..\\mod_data\\2020-12_2021-05_presseportal.csv')\n", " df_blaulicht.sort_index(inplace=True)\n", " tw_locations = list(df_tw_user[['stadt', 'user_id']].itertuples(index=False, name=None))\n", " tw_locations = [(loc, id) for loc, id in tw_locations if len(str(loc)) > 1]\n", " bl_locations = list(set([str(city) for city in df_blaulicht['location'].values]))\n", " bl_tw_locations = {}\n", " for bl_loc in bl_locations:\n", " for tw_loc, tw_id in tw_locations:\n", " if re.search(r'\\b' + re.escape(str(tw_loc).lower()) + r'\\b', str(bl_loc).lower()):\n", " bl_tw_locations[bl_loc] = [tw_loc, tw_id]\n", " return bl_tw_locations" ] }, { "cell_type": "code", "execution_count": 50, "id": "cd8b388a", "metadata": {}, "outputs": [], "source": [ "def find_location(txt, mp):\n", " mapped_blaulicht = mp.get(txt, \"\")\n", " return mapped_blaulicht[1] if mapped_blaulicht != \"\" else \"\"" ] }, { "cell_type": "markdown", "id": "5c29d275", "metadata": {}, "source": [ "### Add col tw_user_id to Blaulichtmeldungen" ] }, { "cell_type": "code", "execution_count": 51, "id": "5ee5a263", "metadata": {}, "outputs": [], "source": [ "def extend_blaulicht_data():\n", " df_blaulicht = concat_blaulicht_dfs()\n", " mapping = map_bl_tw_citys()\n", " df_blaulicht['tw_user_id'] = df_blaulicht['location'].apply(lambda x: find_location(x, mapping))\n", " return df_blaulicht" ] }, { "cell_type": "code", "execution_count": 52, "id": "f869435d", "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: '..\\\\mod_data\\x820-12_2021-05_presseportal.csv'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mextend_blaulicht_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m\u001b[0m in \u001b[0;36mextend_blaulicht_data\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mextend_blaulicht_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mdf_blaulicht\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconcat_blaulicht_dfs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mmapping\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmap_bl_tw_citys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mdf_blaulicht\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tw_user_id'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf_blaulicht\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'location'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mfind_location\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf_blaulicht\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36mmap_bl_tw_citys\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmap_bl_tw_citys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mre\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdf_blaulicht\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'..\\mod_data\\2020-12_2021-05_presseportal.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mdf_blaulicht\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mtw_locations\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_tw_user\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'stadt'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'user_id'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitertuples\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 608\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 609\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 610\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 611\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 461\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 462\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 463\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 464\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 817\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 821\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1048\u001b[0m )\n\u001b[0;32m 1049\u001b[0m \u001b[1;31m# error: Too many arguments for \"ParserBase\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1050\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# type: ignore[call-arg]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1051\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1052\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 1865\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1866\u001b[0m \u001b[1;31m# open handles\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1867\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_open_handles\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1868\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1869\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m\"storage_options\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"encoding\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"memory_map\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"compression\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_open_handles\u001b[1;34m(self, src, kwds)\u001b[0m\n\u001b[0;32m 1360\u001b[0m \u001b[0mLet\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mreaders\u001b[0m \u001b[0mopen\u001b[0m \u001b[0mIOHanldes\u001b[0m \u001b[0mafter\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtheir\u001b[0m \u001b[0mpotential\u001b[0m \u001b[0mraises\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1361\u001b[0m \"\"\"\n\u001b[1;32m-> 1362\u001b[1;33m self.handles = get_handle(\n\u001b[0m\u001b[0;32m 1363\u001b[0m \u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1364\u001b[0m \u001b[1;34m\"r\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\users\\simon\\programming\\hackathons\\copbird-group-16\\copbird-venv\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"replace\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 642\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 643\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 644\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '..\\\\mod_data\\x820-12_2021-05_presseportal.csv'" ] } ], "source": [ "extend_blaulicht_data()[:10]" ] }, { "cell_type": "markdown", "id": "2b756af2", "metadata": {}, "source": [ "## Extract tweets from specific topic by keywords" ] }, { "cell_type": "code", "execution_count": 53, "id": "89fb5ba0", "metadata": {}, "outputs": [], "source": [ "def get_topics_by_str_lst(topic, df, col_name):\n", " df_topiced = df[df[col_name].str.contains('|'.join(topic))]\n", " return df_topiced" ] }, { "cell_type": "code", "execution_count": 54, "id": "7b545748", "metadata": {}, "outputs": [], "source": [ "topic_1 = ['demonstr', 'kundgeb']\n", "topic_2 = ['drogen', 'weed', 'graas', 'lsd', 'cannabis', 'ecstasy', 'kokain', 'meth', 'crystal']\n", "topic_3 = ['rassis', 'diskriminier', 'ausländerfeindlich', 'fremdenfeindlich', 'fremdenhass']\n", "topic_4 = ['antisem', 'juden', 'synagoge', 'judenhass', 'judenfeindlich', 'holocaust']" ] }, { "cell_type": "code", "execution_count": 55, "id": "9bb43006", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_idtweet_textcreated_atuser_iduser_namehandlestadtbundesland
10311322543875169333250Freitagabend beleidigten zwei Männer eine 20-j...2020-10-31 14:20:06808666671468658688Polizei BremenBremenPolizeiBremenBremen
12391323187306698858499@hin_fort Die verwendeten Begriffe spielen für...2020-11-02 08:56:52808666671468658688Polizei BremenBremenPolizeiBremenBremen
14291323269343183015938#Zeugensuche\\r\\n\\r\\n#Frankfurt-#Kalbach: Am 30...2020-11-02 14:22:512272909014Polizei FrankfurtPolizei_FfmFrankfurt am MainHessen
53001328341355626721282@MeineMe53821610 rassistischen Motiven? 🤔2020-11-16 14:17:134201961439Polizei MannheimPolizeiMannheimMannheimBaden-Württemberg
81491331972789801267201@werner42726904 Wenn Sie konkrete Hinweise auf...2020-11-26 14:47:142904886151Polizei HamburgPolizeiHamburgHamburgHamburg
91761333452133623078914@Fuchsbau_22 @ARD_Recht Wie bereits zuvor nich...2020-11-30 16:45:373029998264Polizei KarlsruhePolizei_KAKarlsruheBaden-Württemberg
97631334075361416581120@Initiative_PKD @Fuchsbau_22 @ARD_Recht Wie wi...2020-12-02 10:02:063029998264Polizei KarlsruhePolizei_KAKarlsruheBaden-Württemberg
117021335957493365813250👉Auto eines behinderten Menschen in #Willich #...2020-12-07 14:41:012389266775Polizei NRW VIEpolizei_nrw_vieViersenNordrhein-Westfalen
117521335978008457392129Gestern Abend erschien auf der #Davidwache in ...2020-12-07 16:02:332397974054Polizei BerlinpolizeiberlinBerlinBerlin
150821339960301815537665Eine Kollegin unserer #Dir3 bemerkte, dass ein...2020-12-18 15:46:452397974054Polizei BerlinpolizeiberlinBerlinBerlin
166751343918555495084032#Frankfurt - #Gallus #Griesheim #Nied \\r\\n\\r\\...2020-12-29 13:55:272272909014Polizei FrankfurtPolizei_FfmFrankfurt am MainHessen
173881349742682885775365Zwei Ladendetektive erwischten einen 37-Jährig...2021-01-14 15:38:27808666671468658688Polizei BremenBremenPolizeiBremenBremen
174061349757570114846721@ArizonaStranger Ob die Beleidigung als \"auslä...2021-01-14 16:37:36808666671468658688Polizei BremenBremenPolizeiBremenBremen
218011356540977343836166#Zeugengesucht \\r\\n\\r\\nMontagnachmittag hat ei...2021-02-02 09:52:26808666671468658688Polizei BremenBremenPolizeiBremenBremen
267721364958369426133012#Tagesticker 📝 No. 1\\r\\n\\r\\n➡️ Magdeburg \\r\\n⤵...2021-02-25 15:20:092849730251Polizei MagdeburgPolizei_MDMagdeburgSachsen-Anhalt
274211367063451483340801Eine 41-jähr. Deutsche aus #Fürstenfeldbruck g...2021-03-03 10:45:003169867654Bundespolizei Bayernbpol_byMünchenBayern
292121370091362536853508@amzdo @Polizei_NRW @RN_DORTMUND @Nordstadtblo...2021-03-11 19:16:50769128278Polizei NRW DOpolizei_nrw_doDortmundNordrhein-Westfalen
304751372110500742434816Wir beteiligen uns an den Internationalen Woch...2021-03-17 09:00:102389375698Polizei NRW HXpolizei_nrw_hxHöxterNordrhein-Westfalen
306221372171253809876996Ein klares Statement der #PolizeiSaarland gege...2021-03-17 13:01:35821727790374469632Polizei SaarlandPolizeiSaarlandSaarbrückenSaarland
308011372471378650234882Wir schließen uns dem Beitrag der #PolizeiBrau...2021-03-18 08:54:10773438463068766208Polizei NienburgPolizei_NBGNienburg (Weser)Niedersachsen
308041372476310208061442+++Solidarität-Grenzenlos+++\\r\\n\\r\\nDie diesjä...2021-03-18 09:13:46773132811238768640Polizei HildesheimPolizei_HIHildesheimNiedersachsen
308061372477266383532034Zum heutigen #Aktionstag \"Vorsicht, Vorurteile...2021-03-18 09:17:341169206134189830145Polizei StendalPolizei_SDLStendalSachsen-Anhalt
308211372483689221853187Weiter ging es in die Innenstadt von #Stendal,...2021-03-18 09:43:051169206134189830145Polizei StendalPolizei_SDLStendalSachsen-Anhalt
308451372491481294987264Letzte Station für uns war vor dem Landratsamt...2021-03-18 10:14:031169206134189830145Polizei StendalPolizei_SDLStendalSachsen-Anhalt
308661372497013946847236#DemokratieLeben! – Die PI #Cuxhaven unterstüt...2021-03-18 10:36:02948208635448094720Polizei CuxhavenPolizei_CUXCuxhavenNiedersachsen
316091373545951684665344👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:081232548941889228808Polizei Bad NenndorfPolizei_BadNBad NenndorfNiedersachsen
316101373545975726415876👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:14772751356230823936Polizei GöttingenPolizei_GOEGöttingenNiedersachsen
316111373545977727045635👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:14773132811238768640Polizei HildesheimPolizei_HIHildesheimNiedersachsen
316121373545985532645384👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:16773785655340048384Polizei HolzmindenPolizei_HOLHolzmindenNiedersachsen
316131373545986056916993👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:16773438463068766208Polizei NienburgPolizei_NBGNienburg (Weser)Niedersachsen
316141373545990226067458👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:17773805850687340544Polizei OsterodePolizei_OHAOsterode am HarzNiedersachsen
316151373545990339366912👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:18773811779013083136Polizei StadthagenPolizei_STHStadthagenNiedersachsen
316161373545994428817410👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:18775277344705609732Polizei BückeburgPolizei_BBGBückeburgNiedersachsen
316171373546003354300417👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:21773413449267281920Polizei HamelnPolizei_HMHameln-PyrmontNiedersachsen
316181373546006219010049👉Internationaler Tag gegen Rassismus #Solidari...2021-03-21 08:04:21773778874979287040Polizei NortheimPolizei_NOMNortheimNiedersachsen
316211373552485424635904In unserer Gesellschaft ist weder Platz für Ha...2021-03-21 08:30:062389342951Polizei NRW PBpolizei_nrw_pbPaderbornNordrhein-Westfalen
316641373627984574345217Wir nahmen am Do einen 53-jähirgen Paderborner...2021-03-21 13:30:062389146030Polizei NRW BIpolizei_nrw_biBielefeldNordrhein-Westfalen
318261373939038982397953Am Freitagabend ist ein Mann an der Endhaltest...2021-03-22 10:06:08223758384Polizei SachsenPolizeiSachsenDresdenSachsen
320811374039903827333120Zwei unbekannte Männer haben heute Morgen auf ...2021-03-22 16:46:56223758384Polizei SachsenPolizeiSachsenDresdenSachsen
320981374060177591271425Zeugen nach gefährlicher Körperverletzung gesu...2021-03-22 18:07:29769128278Polizei NRW DOpolizei_nrw_doDortmundNordrhein-Westfalen
323651374382966768472075Fremdenfeindliches Graffiti - #Polizei #Aachen...2021-03-23 15:30:082389142300Polizei NRW ACPolizei_NRW_ACAachenNordrhein-Westfalen
324711374628552499466240Fremdenfeindliches Graffiti - #Polizei #Aachen...2021-03-24 07:46:002389142300Polizei NRW ACPolizei_NRW_ACAachenNordrhein-Westfalen
332101375372215303208961Hass im Netz - Handeln gegen Hater\\r\\nHass, Be...2021-03-26 09:01:03783322939580092418Polizei UlmPolizeiULUlmBaden-Württemberg
336661376021420564693000Gemeinsam stark gegen Rassismus\\r\\n\\r\\nUnser S...2021-03-28 04:00:46821727790374469632Polizei SaarlandPolizeiSaarlandSaarbrückenSaarland
366121379754045099945984#FriedrichsGymnasium #StadttheaterHerford #Rav...2021-04-07 11:12:532389146030Polizei NRW BIpolizei_nrw_biBielefeldNordrhein-Westfalen
367441379876164932616197@Hase_Zwo @Einzelfallinfos Wie Sie der Pressem...2021-04-07 19:18:09808666671468658688Polizei BremenBremenPolizeiBremenBremen
377361381566100052312066Am Samstagabend skandierte eine Gruppe von etw...2021-04-12 11:13:21808666671468658688Polizei BremenBremenPolizeiBremenBremen
398781384459042496593920Unbekannte Täter beschmierten im Zeitraum zwis...2021-04-20 10:48:522389142300Polizei NRW ACPolizei_NRW_ACAachenNordrhein-Westfalen
405251387051636627894285Unser #LKA sucht 2 Männer, die letztes Jahr im...2021-04-27 14:30:552397974054Polizei BerlinpolizeiberlinBerlinBerlin
420851389136356211892226Zwei Festnahmen nach Beleidigung mit fremdenfe...2021-05-03 08:34:502904886151Polizei HamburgPolizeiHamburgHamburgHamburg
422171389182069499105280BürgerInnen in Sömmerda machten am vergangenen...2021-05-03 11:36:293064348636Polizei ThüringenPolizei_ThuerErfurtThüringen
422201389182634836832257BürgerInnen in Sömmerda machten am vergangenen...2021-05-03 11:38:443064348636Polizei ThüringenPolizei_ThuerErfurtThüringen
422371389190825406513158🔵 44-Jähriger bedroht Minderjährige mit Schrec...2021-05-03 12:11:17770652658566852608Polizei HannoverPolizei_HHannoverNiedersachsen
\n", "
" ], "text/plain": [ " tweet_id tweet_text \\\n", "1031 1322543875169333250 Freitagabend beleidigten zwei Männer eine 20-j... \n", "1239 1323187306698858499 @hin_fort Die verwendeten Begriffe spielen für... \n", "1429 1323269343183015938 #Zeugensuche\\r\\n\\r\\n#Frankfurt-#Kalbach: Am 30... \n", "5300 1328341355626721282 @MeineMe53821610 rassistischen Motiven? 🤔 \n", "8149 1331972789801267201 @werner42726904 Wenn Sie konkrete Hinweise auf... \n", "9176 1333452133623078914 @Fuchsbau_22 @ARD_Recht Wie bereits zuvor nich... \n", "9763 1334075361416581120 @Initiative_PKD @Fuchsbau_22 @ARD_Recht Wie wi... \n", "11702 1335957493365813250 👉Auto eines behinderten Menschen in #Willich #... \n", "11752 1335978008457392129 Gestern Abend erschien auf der #Davidwache in ... \n", "15082 1339960301815537665 Eine Kollegin unserer #Dir3 bemerkte, dass ein... \n", "16675 1343918555495084032 #Frankfurt - #Gallus #Griesheim #Nied \\r\\n\\r\\... \n", "17388 1349742682885775365 Zwei Ladendetektive erwischten einen 37-Jährig... \n", "17406 1349757570114846721 @ArizonaStranger Ob die Beleidigung als \"auslä... \n", "21801 1356540977343836166 #Zeugengesucht \\r\\n\\r\\nMontagnachmittag hat ei... \n", "26772 1364958369426133012 #Tagesticker 📝 No. 1\\r\\n\\r\\n➡️ Magdeburg \\r\\n⤵... \n", "27421 1367063451483340801 Eine 41-jähr. Deutsche aus #Fürstenfeldbruck g... \n", "29212 1370091362536853508 @amzdo @Polizei_NRW @RN_DORTMUND @Nordstadtblo... \n", "30475 1372110500742434816 Wir beteiligen uns an den Internationalen Woch... \n", "30622 1372171253809876996 Ein klares Statement der #PolizeiSaarland gege... \n", "30801 1372471378650234882 Wir schließen uns dem Beitrag der #PolizeiBrau... \n", "30804 1372476310208061442 +++Solidarität-Grenzenlos+++\\r\\n\\r\\nDie diesjä... \n", "30806 1372477266383532034 Zum heutigen #Aktionstag \"Vorsicht, Vorurteile... \n", "30821 1372483689221853187 Weiter ging es in die Innenstadt von #Stendal,... \n", "30845 1372491481294987264 Letzte Station für uns war vor dem Landratsamt... \n", "30866 1372497013946847236 #DemokratieLeben! – Die PI #Cuxhaven unterstüt... \n", "31609 1373545951684665344 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31610 1373545975726415876 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31611 1373545977727045635 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31612 1373545985532645384 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31613 1373545986056916993 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31614 1373545990226067458 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31615 1373545990339366912 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31616 1373545994428817410 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31617 1373546003354300417 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31618 1373546006219010049 👉Internationaler Tag gegen Rassismus #Solidari... \n", "31621 1373552485424635904 In unserer Gesellschaft ist weder Platz für Ha... \n", "31664 1373627984574345217 Wir nahmen am Do einen 53-jähirgen Paderborner... \n", "31826 1373939038982397953 Am Freitagabend ist ein Mann an der Endhaltest... \n", "32081 1374039903827333120 Zwei unbekannte Männer haben heute Morgen auf ... \n", "32098 1374060177591271425 Zeugen nach gefährlicher Körperverletzung gesu... \n", "32365 1374382966768472075 Fremdenfeindliches Graffiti - #Polizei #Aachen... \n", "32471 1374628552499466240 Fremdenfeindliches Graffiti - #Polizei #Aachen... \n", "33210 1375372215303208961 Hass im Netz - Handeln gegen Hater\\r\\nHass, Be... \n", "33666 1376021420564693000 Gemeinsam stark gegen Rassismus\\r\\n\\r\\nUnser S... \n", "36612 1379754045099945984 #FriedrichsGymnasium #StadttheaterHerford #Rav... \n", "36744 1379876164932616197 @Hase_Zwo @Einzelfallinfos Wie Sie der Pressem... \n", "37736 1381566100052312066 Am Samstagabend skandierte eine Gruppe von etw... \n", "39878 1384459042496593920 Unbekannte Täter beschmierten im Zeitraum zwis... \n", "40525 1387051636627894285 Unser #LKA sucht 2 Männer, die letztes Jahr im... \n", "42085 1389136356211892226 Zwei Festnahmen nach Beleidigung mit fremdenfe... \n", "42217 1389182069499105280 BürgerInnen in Sömmerda machten am vergangenen... \n", "42220 1389182634836832257 BürgerInnen in Sömmerda machten am vergangenen... \n", "42237 1389190825406513158 🔵 44-Jähriger bedroht Minderjährige mit Schrec... \n", "\n", " created_at user_id user_name \\\n", "1031 2020-10-31 14:20:06 808666671468658688 Polizei Bremen \n", "1239 2020-11-02 08:56:52 808666671468658688 Polizei Bremen \n", "1429 2020-11-02 14:22:51 2272909014 Polizei Frankfurt \n", "5300 2020-11-16 14:17:13 4201961439 Polizei Mannheim \n", "8149 2020-11-26 14:47:14 2904886151 Polizei Hamburg \n", "9176 2020-11-30 16:45:37 3029998264 Polizei Karlsruhe \n", "9763 2020-12-02 10:02:06 3029998264 Polizei Karlsruhe \n", "11702 2020-12-07 14:41:01 2389266775 Polizei NRW VIE \n", "11752 2020-12-07 16:02:33 2397974054 Polizei Berlin \n", "15082 2020-12-18 15:46:45 2397974054 Polizei Berlin \n", "16675 2020-12-29 13:55:27 2272909014 Polizei Frankfurt \n", "17388 2021-01-14 15:38:27 808666671468658688 Polizei Bremen \n", "17406 2021-01-14 16:37:36 808666671468658688 Polizei Bremen \n", "21801 2021-02-02 09:52:26 808666671468658688 Polizei Bremen \n", "26772 2021-02-25 15:20:09 2849730251 Polizei Magdeburg \n", "27421 2021-03-03 10:45:00 3169867654 Bundespolizei Bayern \n", "29212 2021-03-11 19:16:50 769128278 Polizei NRW DO \n", "30475 2021-03-17 09:00:10 2389375698 Polizei NRW HX \n", "30622 2021-03-17 13:01:35 821727790374469632 Polizei Saarland \n", "30801 2021-03-18 08:54:10 773438463068766208 Polizei Nienburg \n", "30804 2021-03-18 09:13:46 773132811238768640 Polizei Hildesheim \n", "30806 2021-03-18 09:17:34 1169206134189830145 Polizei Stendal \n", "30821 2021-03-18 09:43:05 1169206134189830145 Polizei Stendal \n", "30845 2021-03-18 10:14:03 1169206134189830145 Polizei Stendal \n", "30866 2021-03-18 10:36:02 948208635448094720 Polizei Cuxhaven \n", "31609 2021-03-21 08:04:08 1232548941889228808 Polizei Bad Nenndorf \n", "31610 2021-03-21 08:04:14 772751356230823936 Polizei Göttingen \n", "31611 2021-03-21 08:04:14 773132811238768640 Polizei Hildesheim \n", "31612 2021-03-21 08:04:16 773785655340048384 Polizei Holzminden \n", "31613 2021-03-21 08:04:16 773438463068766208 Polizei Nienburg \n", "31614 2021-03-21 08:04:17 773805850687340544 Polizei Osterode \n", "31615 2021-03-21 08:04:18 773811779013083136 Polizei Stadthagen \n", "31616 2021-03-21 08:04:18 775277344705609732 Polizei Bückeburg \n", "31617 2021-03-21 08:04:21 773413449267281920 Polizei Hameln \n", "31618 2021-03-21 08:04:21 773778874979287040 Polizei Northeim \n", "31621 2021-03-21 08:30:06 2389342951 Polizei NRW PB \n", "31664 2021-03-21 13:30:06 2389146030 Polizei NRW BI \n", "31826 2021-03-22 10:06:08 223758384 Polizei Sachsen \n", "32081 2021-03-22 16:46:56 223758384 Polizei Sachsen \n", "32098 2021-03-22 18:07:29 769128278 Polizei NRW DO \n", "32365 2021-03-23 15:30:08 2389142300 Polizei NRW AC \n", "32471 2021-03-24 07:46:00 2389142300 Polizei NRW AC \n", "33210 2021-03-26 09:01:03 783322939580092418 Polizei Ulm \n", "33666 2021-03-28 04:00:46 821727790374469632 Polizei Saarland \n", "36612 2021-04-07 11:12:53 2389146030 Polizei NRW BI \n", "36744 2021-04-07 19:18:09 808666671468658688 Polizei Bremen \n", "37736 2021-04-12 11:13:21 808666671468658688 Polizei Bremen \n", "39878 2021-04-20 10:48:52 2389142300 Polizei NRW AC \n", "40525 2021-04-27 14:30:55 2397974054 Polizei Berlin \n", "42085 2021-05-03 08:34:50 2904886151 Polizei Hamburg \n", "42217 2021-05-03 11:36:29 3064348636 Polizei Thüringen \n", "42220 2021-05-03 11:38:44 3064348636 Polizei Thüringen \n", "42237 2021-05-03 12:11:17 770652658566852608 Polizei Hannover \n", "\n", " handle stadt bundesland \n", "1031 BremenPolizei Bremen Bremen \n", "1239 BremenPolizei Bremen Bremen \n", "1429 Polizei_Ffm Frankfurt am Main Hessen \n", "5300 PolizeiMannheim Mannheim Baden-Württemberg \n", "8149 PolizeiHamburg Hamburg Hamburg \n", "9176 Polizei_KA Karlsruhe Baden-Württemberg \n", "9763 Polizei_KA Karlsruhe Baden-Württemberg \n", "11702 polizei_nrw_vie Viersen Nordrhein-Westfalen \n", "11752 polizeiberlin Berlin Berlin \n", "15082 polizeiberlin Berlin Berlin \n", "16675 Polizei_Ffm Frankfurt am Main Hessen \n", "17388 BremenPolizei Bremen Bremen \n", "17406 BremenPolizei Bremen Bremen \n", "21801 BremenPolizei Bremen Bremen \n", "26772 Polizei_MD Magdeburg Sachsen-Anhalt \n", "27421 bpol_by München Bayern \n", "29212 polizei_nrw_do Dortmund Nordrhein-Westfalen \n", "30475 polizei_nrw_hx Höxter Nordrhein-Westfalen \n", "30622 PolizeiSaarland Saarbrücken Saarland \n", "30801 Polizei_NBG Nienburg (Weser) Niedersachsen \n", "30804 Polizei_HI Hildesheim Niedersachsen \n", "30806 Polizei_SDL Stendal Sachsen-Anhalt \n", "30821 Polizei_SDL Stendal Sachsen-Anhalt \n", "30845 Polizei_SDL Stendal Sachsen-Anhalt \n", "30866 Polizei_CUX Cuxhaven Niedersachsen \n", "31609 Polizei_BadN Bad Nenndorf Niedersachsen \n", "31610 Polizei_GOE Göttingen Niedersachsen \n", "31611 Polizei_HI Hildesheim Niedersachsen \n", "31612 Polizei_HOL Holzminden Niedersachsen \n", "31613 Polizei_NBG Nienburg (Weser) Niedersachsen \n", "31614 Polizei_OHA Osterode am Harz Niedersachsen \n", "31615 Polizei_STH Stadthagen Niedersachsen \n", "31616 Polizei_BBG Bückeburg Niedersachsen \n", "31617 Polizei_HM Hameln-Pyrmont Niedersachsen \n", "31618 Polizei_NOM Northeim Niedersachsen \n", "31621 polizei_nrw_pb Paderborn Nordrhein-Westfalen \n", "31664 polizei_nrw_bi Bielefeld Nordrhein-Westfalen \n", "31826 PolizeiSachsen Dresden Sachsen \n", "32081 PolizeiSachsen Dresden Sachsen \n", "32098 polizei_nrw_do Dortmund Nordrhein-Westfalen \n", "32365 Polizei_NRW_AC Aachen Nordrhein-Westfalen \n", "32471 Polizei_NRW_AC Aachen Nordrhein-Westfalen \n", "33210 PolizeiUL Ulm Baden-Württemberg \n", "33666 PolizeiSaarland Saarbrücken Saarland \n", "36612 polizei_nrw_bi Bielefeld Nordrhein-Westfalen \n", "36744 BremenPolizei Bremen Bremen \n", "37736 BremenPolizei Bremen Bremen \n", "39878 Polizei_NRW_AC Aachen Nordrhein-Westfalen \n", "40525 polizeiberlin Berlin Berlin \n", "42085 PolizeiHamburg Hamburg Hamburg \n", "42217 Polizei_Thuer Erfurt Thüringen \n", "42220 Polizei_Thuer Erfurt Thüringen \n", "42237 Polizei_H Hannover Niedersachsen " ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tw = pd.read_csv('..\\mod_data\\copbird_table_tweet_ext_state.csv', na_filter=False)\n", "df_tw_col = 'tweet_text'\n", "get_topics_by_str_lst(topic=topic_3, df=df_tw, col_name=df_tw_col)" ] }, { "cell_type": "code", "execution_count": 56, "id": "8d0a53cc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
article_idtimestamplocationbundeslandcontenttw_user_id
76934970-48012222020-12-29 14:42:00FrankfurthessenFrankfurt (ots) - (em) Freitagnachmittag (25. ...
1396235235-48125572021-01-14 16:24:00BremenbremenBremen (ots) - - Ort: Bremen-Mitte, Bahnhofsvo...808666671468658688
19445104234-48235022021-01-27 17:12:00LingenniedersachsenLingen (ots) - Lingen/Brögbern - Am Sonntagnac...773160525324808193
20919117687-48259872021-01-30 13:38:00Bad Dürkheimrheinland-pfalzBad Dürkheim (ots) - Am 30.01.2021 um 13 Uhr b...
22245126723-48048012021-01-05 11:25:00MühlhausenthueringenMühlhausen (ots) - Mit ausländerfeindlichen Pa...
231176013-48072452021-01-08 12:08:00NürnbergbayernNürnberg (ots) - In den vergangenen Tagen wurd...800718568572612608
2423943526-48363652021-02-11 15:10:00Nordwaldenordrhein-westfalenNordwalde (ots) - Gemeinsame Pressemitteilung ...
2601824843-48398062021-02-16 12:17:00Isselburgnordrhein-westfalenIsselburg (ots) - Einige Konsequenzen eingehan...
2982611187-48459462021-02-23 11:43:00Münsternordrhein-westfalenMünster (ots) - Nach rassistischen Beleidigung...2284811875
3124135235-48492622021-02-26 11:53:00BremenbremenBremen (ots) - - Ort: Bremen-Hemelingen, OT He...808666671468658688
31719108747-48498052021-02-27 02:30:00Pasewalkmecklenburg-vorpommernPasewalk (ots) - Am 26.02.2021 wurden die Beam...
3213735235-48284452021-02-02 14:34:00BremenbremenBremen (ots) - - Ort: Bremen-Findorff, OT Weid...808666671468658688
3214035235-48279772021-02-02 10:45:00BremenbremenBremen (ots) - - Ort: Bremen-Findorff, OT Weid...808666671468658688
3264135235-48287772021-02-03 09:34:00BremenbremenBremen (ots) - - Ort: Bremen-Findorff Zeit: 02...808666671468658688
37060119277-48634182021-03-14 14:35:00OldenburgniedersachsenOldenburg (ots) - Am 13.02.21, um 16:20 Uhr, w...
3954755625-48686262021-03-19 13:35:00Paderbornnordrhein-westfalenPaderborn (ots) - HC - Ein Paderborner beleidi...2389342951
3957512522-48685142021-03-19 12:39:00Bielefeldnordrhein-westfalenBielefeld (ots) - HC/ Bielefeld- Paderborn - E...2389146030
411624971-48706622021-03-22 17:13:00Dortmundnordrhein-westfalenDortmund (ots) - Lfd. Nr.: 0314 In der Nacht v...769128278
4177711559-48714072021-03-23 12:53:00Aachennordrhein-westfalenAachen (ots) - Unbekannte Täter haben - vermut...2389142300
44966117683-48526652021-03-02 14:13:00Kaiserslauternrheinland-pfalzKaiserslautern (ots) - Unbekannte Schmierfinke...886595382
48417104236-48583022021-03-08 14:58:00OsnabrückniedersachsenOsnabrück (ots) - Unbekannte machten sich zwis...760752388454113280
4986435235-48866612021-04-12 12:46:00BremenbremenBremen (ots) - - Ort: Bremen-Blumenthal, OT Lü...808666671468658688
5347965846-48803172021-04-01 20:30:00Herfordnordrhein-westfalenHerford (ots) - (mmb) Am Donnerstag (01.04.202...2389393237
5410911559-48937102021-04-20 11:41:00Aachennordrhein-westfalenAachen (ots) - Unbekannte Täter beschmierten i...2389142300
5642843648-48995392021-04-26 14:43:00Marburg-BiedenkopfhessenMarburg-Biedenkopf (ots) - Mahnmal beschädigt-...
6741066841-49051622021-05-03 12:26:00HannoverniedersachsenHannover (ots) - Am Freitagabend, 30.04.2021, ...770652658566852608
67743126719-49051632021-05-03 12:27:00SömmerdathueringenSömmerda (ots) - Am Freitagnachmittag pöbelte ...
69663126723-49094192021-05-07 11:34:00NordhausenthueringenNordhausen (ots) - Bisherigen Ermittlungen zuf...
\n", "
" ], "text/plain": [ " article_id timestamp location \\\n", "7693 4970-4801222 2020-12-29 14:42:00 Frankfurt \n", "13962 35235-4812557 2021-01-14 16:24:00 Bremen \n", "19445 104234-4823502 2021-01-27 17:12:00 Lingen \n", "20919 117687-4825987 2021-01-30 13:38:00 Bad Dürkheim \n", "22245 126723-4804801 2021-01-05 11:25:00 Mühlhausen \n", "23117 6013-4807245 2021-01-08 12:08:00 Nürnberg \n", "24239 43526-4836365 2021-02-11 15:10:00 Nordwalde \n", "26018 24843-4839806 2021-02-16 12:17:00 Isselburg \n", "29826 11187-4845946 2021-02-23 11:43:00 Münster \n", "31241 35235-4849262 2021-02-26 11:53:00 Bremen \n", "31719 108747-4849805 2021-02-27 02:30:00 Pasewalk \n", "32137 35235-4828445 2021-02-02 14:34:00 Bremen \n", "32140 35235-4827977 2021-02-02 10:45:00 Bremen \n", "32641 35235-4828777 2021-02-03 09:34:00 Bremen \n", "37060 119277-4863418 2021-03-14 14:35:00 Oldenburg \n", "39547 55625-4868626 2021-03-19 13:35:00 Paderborn \n", "39575 12522-4868514 2021-03-19 12:39:00 Bielefeld \n", "41162 4971-4870662 2021-03-22 17:13:00 Dortmund \n", "41777 11559-4871407 2021-03-23 12:53:00 Aachen \n", "44966 117683-4852665 2021-03-02 14:13:00 Kaiserslautern \n", "48417 104236-4858302 2021-03-08 14:58:00 Osnabrück \n", "49864 35235-4886661 2021-04-12 12:46:00 Bremen \n", "53479 65846-4880317 2021-04-01 20:30:00 Herford \n", "54109 11559-4893710 2021-04-20 11:41:00 Aachen \n", "56428 43648-4899539 2021-04-26 14:43:00 Marburg-Biedenkopf \n", "67410 66841-4905162 2021-05-03 12:26:00 Hannover \n", "67743 126719-4905163 2021-05-03 12:27:00 Sömmerda \n", "69663 126723-4909419 2021-05-07 11:34:00 Nordhausen \n", "\n", " bundesland \\\n", "7693 hessen \n", "13962 bremen \n", "19445 niedersachsen \n", "20919 rheinland-pfalz \n", "22245 thueringen \n", "23117 bayern \n", "24239 nordrhein-westfalen \n", "26018 nordrhein-westfalen \n", "29826 nordrhein-westfalen \n", "31241 bremen \n", "31719 mecklenburg-vorpommern \n", "32137 bremen \n", "32140 bremen \n", "32641 bremen \n", "37060 niedersachsen \n", "39547 nordrhein-westfalen \n", "39575 nordrhein-westfalen \n", "41162 nordrhein-westfalen \n", "41777 nordrhein-westfalen \n", "44966 rheinland-pfalz \n", "48417 niedersachsen \n", "49864 bremen \n", "53479 nordrhein-westfalen \n", "54109 nordrhein-westfalen \n", "56428 hessen \n", "67410 niedersachsen \n", "67743 thueringen \n", "69663 thueringen \n", "\n", " content tw_user_id \n", "7693 Frankfurt (ots) - (em) Freitagnachmittag (25. ... \n", "13962 Bremen (ots) - - Ort: Bremen-Mitte, Bahnhofsvo... 808666671468658688 \n", "19445 Lingen (ots) - Lingen/Brögbern - Am Sonntagnac... 773160525324808193 \n", "20919 Bad Dürkheim (ots) - Am 30.01.2021 um 13 Uhr b... \n", "22245 Mühlhausen (ots) - Mit ausländerfeindlichen Pa... \n", "23117 Nürnberg (ots) - In den vergangenen Tagen wurd... 800718568572612608 \n", "24239 Nordwalde (ots) - Gemeinsame Pressemitteilung ... \n", "26018 Isselburg (ots) - Einige Konsequenzen eingehan... \n", "29826 Münster (ots) - Nach rassistischen Beleidigung... 2284811875 \n", "31241 Bremen (ots) - - Ort: Bremen-Hemelingen, OT He... 808666671468658688 \n", "31719 Pasewalk (ots) - Am 26.02.2021 wurden die Beam... \n", "32137 Bremen (ots) - - Ort: Bremen-Findorff, OT Weid... 808666671468658688 \n", "32140 Bremen (ots) - - Ort: Bremen-Findorff, OT Weid... 808666671468658688 \n", "32641 Bremen (ots) - - Ort: Bremen-Findorff Zeit: 02... 808666671468658688 \n", "37060 Oldenburg (ots) - Am 13.02.21, um 16:20 Uhr, w... \n", "39547 Paderborn (ots) - HC - Ein Paderborner beleidi... 2389342951 \n", "39575 Bielefeld (ots) - HC/ Bielefeld- Paderborn - E... 2389146030 \n", "41162 Dortmund (ots) - Lfd. Nr.: 0314 In der Nacht v... 769128278 \n", "41777 Aachen (ots) - Unbekannte Täter haben - vermut... 2389142300 \n", "44966 Kaiserslautern (ots) - Unbekannte Schmierfinke... 886595382 \n", "48417 Osnabrück (ots) - Unbekannte machten sich zwis... 760752388454113280 \n", "49864 Bremen (ots) - - Ort: Bremen-Blumenthal, OT Lü... 808666671468658688 \n", "53479 Herford (ots) - (mmb) Am Donnerstag (01.04.202... 2389393237 \n", "54109 Aachen (ots) - Unbekannte Täter beschmierten i... 2389142300 \n", "56428 Marburg-Biedenkopf (ots) - Mahnmal beschädigt-... \n", "67410 Hannover (ots) - Am Freitagabend, 30.04.2021, ... 770652658566852608 \n", "67743 Sömmerda (ots) - Am Freitagnachmittag pöbelte ... \n", "69663 Nordhausen (ots) - Bisherigen Ermittlungen zuf... " ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pm = pd.read_csv(r'..\\mod_data\\2020-12_2021-05_presseportal.csv', na_filter=False)\n", "df_pm_col = 'content'\n", "get_topics_by_str_lst(topic=topic_3, df=df_pm, col_name=df_pm_col)" ] }, { "cell_type": "code", "execution_count": null, "id": "a205693e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6fb4345e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "python-scientific kernel", "language": "python", "name": "python-scientific" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }