{ "cells": [ { "cell_type": "markdown", "id": "847f9651", "metadata": {}, "source": [ "Pakete laden" ] }, { "cell_type": "code", "execution_count": 1, "id": "0a01b63b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from tqdm import tqdm # Fortschrittsanzeige für pandas\n", "tqdm.pandas()\n", "import folium\n", "from folium import plugins as fpg" ] }, { "cell_type": "markdown", "id": "b515c823", "metadata": {}, "source": [ "Daten laden" ] }, { "cell_type": "code", "execution_count": 2, "id": "0eec2734", "metadata": {}, "outputs": [], "source": [ "tweet_csv = '../data/copbird_table_tweet.csv'\n", "entity_csv = '../data/copbird_table_entity.csv'\n", "user_csv = '../data/copbird_table_user.csv'\n", "cop_geolocations_csv = '../geolocations/polizei_accounts_geo.csv'" ] }, { "cell_type": "code", "execution_count": 3, "id": "fbe080bc", "metadata": {}, "outputs": [], "source": [ "limit = None\n", "tweets = pd.read_csv(tweet_csv, nrows=limit)\n", "entities = pd.read_csv(entity_csv, nrows=limit)\n", "users = pd.read_csv(user_csv, nrows=limit)\n", "geolocs = pd.read_csv(cop_geolocations_csv, sep=r'\\t', engine='python', nrows=limit)\n", "# skip first two rows since they do not contain GPS data\n", "geolocs = geolocs.iloc[2:, :]" ] }, { "cell_type": "markdown", "id": "86a35bfb", "metadata": {}, "source": [ "Map für Deutschland initialisieren" ] }, { "cell_type": "code", "execution_count": 4, "id": "46da62ac", "metadata": {}, "outputs": [], "source": [ "map_ = folium.Map(location=[51.423, 9.03], zoom_start=6)" ] }, { "cell_type": "markdown", "id": "8c426c02", "metadata": {}, "source": [ "Kombination von Tweets mit GPS-Stempel" ] }, { "cell_type": "code", "execution_count": 5, "id": "b70fd2b2", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Füge den Accounts, zu denen es ein GPS-Datum gibt, dieses hinzu:\n", "users['handle'] = users['handle'].str.lower().str.strip()\n", "geolocs['Polizei Account'] = geolocs['Polizei Account'].str.lower().str.strip()\n", "users_geolocs = users.merge(geolocs, left_on='handle', right_on='Polizei Account', how='inner')\n", "\n", "# Die Spalte mit dem Handle brauchen wir nur einmal:\n", "users_geolocs.drop(columns='Polizei Account', inplace=True)\n", "# Füge dazu, falls vorhanden, Tweets hinzu:\n", "us_ge_tw = users_geolocs.merge(tweets, left_on='id', right_on='user_id', how='inner')\n", "# Wieder redundante Spalte wegschmeißen:\n", "us_ge_tw.drop(columns='id_x', inplace=True)\n", "us_ge_tw.rename(columns={'id_y':'id'}, inplace=True)\n", "#us_ge_tw.head()\n", "\n", "#bad_users = users_geolocs_left.where(users_geolocs_left['LAT'].isnull()).dropna(how='all')[['id','name','handle']]\n", "#second_try = bad_users.merge(geolocs, left_on='handle', right_on='Polizei Account', how='right')\n", "#second_try#.where(second_try['id'].isnull()).dropna(how='all')\n", "\n" ] }, { "cell_type": "raw", "id": "d4d9325e", "metadata": {}, "source": [ "Heat = Tweet mit maximaler Metrik bezogen auf den Account" ] }, { "cell_type": "code", "execution_count": 6, "id": "6122decc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "146\n", "161\n" ] } ], "source": [ "metric = 'like_count'\n", "grouped = us_ge_tw.groupby(['LAT','LONG'])\n", "\n", "max_counts = []\n", "number_of_groups = grouped.ngroups\n", "print(number_of_groups)\n", "print(len(geolocs))\n", "max_all = us_ge_tw[metric].max()\n", "\n", "for i in range(0,127): #TODO rausfinden was mit der range nicht stimmt (eig. sollte number_of_groups verwendet werden)\n", " gps = geolocs.iloc[i][['LAT','LONG']] \n", " lat = gps[0]\n", " long = gps[1]\n", " mygroup = grouped.get_group((lat, long))\n", " max_counts.append([lat, long, mygroup[metric].max()])\n", " \n" ] }, { "cell_type": "markdown", "id": "7c944621", "metadata": {}, "source": [ "Experiment with the handles" ] }, { "cell_type": "code", "execution_count": 7, "id": "78d607bc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id name handle\n", "57 2397974054 Polizei Berlin polizeiberlin\n", " Polizei Account Name Typ Bundesland Stadt LAT \\\n", "25 polizeiberlin Polizei Berlin Polizei Berlin Berlin 52.520007 \n", "\n", " LONG \n", "25 13.404954 \n" ] } ], "source": [ "\n", "print(users.loc[users['handle']=='polizeiberlin'])\n", "print(geolocs.loc[geolocs['Polizei Account']=='polizeiberlin'])" ] }, { "cell_type": "code", "execution_count": 8, "id": "cd9698b3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Polizei Account polizeibhv\n", "Name Polizei Bremerhaven\n", "Typ Polizei\n", "Bundesland Bremen\n", "Stadt Bremerhaven\n", "LAT 53.539584\n", "LONG 8.580942\n", "Name: 27, dtype: object\n", "polizeiberlin\n", "polizeibhv\n" ] } ], "source": [ "\n", "\n", "u = users.iloc[57]\n", "g = geolocs.iloc[25]\n", "#u['handle'].eq(g['Polizei Account'])\n", "print(g)\n", "print(u['handle'])\n", "print(g['Polizei Account'])" ] }, { "cell_type": "code", "execution_count": 9, "id": "3ac1d3e3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "19498.0\n" ] } ], "source": [ "max_likes = us_ge_tw['like_count'].max()\n", "likes = us_ge_tw['like_count'] / max_likes\n", "print(max_likes)" ] }, { "cell_type": "code", "execution_count": 10, "id": "e862ec51", "metadata": {}, "outputs": [], "source": [ "locs = us_ge_tw[['LAT','LONG']]" ] }, { "cell_type": "code", "execution_count": 11, "id": "400543a3", "metadata": {}, "outputs": [], "source": [ "#wlocs = locs.merge(pd.DataFrame(data = [likes.values] * len(locs), columns = likes.index, index=locs.index), left_index=True, right_index=True)\n", "wlocs = pd.concat([locs, likes], axis=1)\n", "wlocs = wlocs.dropna()\n", "wloclist = wlocs.values.tolist()\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "29af903a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "folium.plugins.HeatMap(data=max_counts, radius=15, min_opacity=.5, blur=5).add_to(map_)\n", "map_" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.4" } }, "nbformat": 4, "nbformat_minor": 5 }