{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5eecbeeb", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from tqdm import tqdm # Fortschrittsanzeige für pandas\n", "tqdm.pandas()\n", "tweet_csv = '../data/copbird_table_tweet.csv'\n", "entity_csv = '../data/copbird_table_entity.csv'\n", "user_csv = '../data/copbird_table_user.csv'\n", "geo_csv = '../geolocations/polizei_accounts_geo.csv'\n", "\n", "limit = None\n", "tweets = pd.read_csv(tweet_csv, nrows=limit)\n", "entities = pd.read_csv(entity_csv, nrows=limit)\n", "users = pd.read_csv(user_csv, nrows=limit)\n", "geo = pd.read_csv(geo_csv, nrows=limit, sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "2fbcec52", "metadata": {}, "outputs": [], "source": [ "#tweets[\"contains\"] = tweets[\"tweet_text\"].str.contains(r'(([@])\\w)')\n", "#tweets_subset = tweets[tweets[\"contains\"]]\n", "#tweets_subset[\"mention\"] = tweets_subset[\"tweet_text\"].str.extract(r'(([@])\\w)')\n", "mentions = entities[entities[\"entity_type\"]==\"mention\"]" ] }, { "cell_type": "code", "execution_count": 3, "id": "3e1d2876", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_idtagentity_type
01321021123463663616mahanna196mention
11321025127388188673bkamention
21321028108665950208StrupeitVolkermention
31321029199998656513bkamention
41321032307277443072Sitewindermention
51321033945467834368GunstickULMmention
61321034866377543680Korbi161224mention
71321035908146233344GunstickULMmention
81321036834579894272retikulum383mention
91321036834579894272FPolitikermention
\n", "
" ], "text/plain": [ " tweet_id tag entity_type\n", "0 1321021123463663616 mahanna196 mention\n", "1 1321025127388188673 bka mention\n", "2 1321028108665950208 StrupeitVolker mention\n", "3 1321029199998656513 bka mention\n", "4 1321032307277443072 Sitewinder mention\n", "5 1321033945467834368 GunstickULM mention\n", "6 1321034866377543680 Korbi161224 mention\n", "7 1321035908146233344 GunstickULM mention\n", "8 1321036834579894272 retikulum383 mention\n", "9 1321036834579894272 FPolitiker mention" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mentions.head(10)" ] }, { "cell_type": "code", "execution_count": 4, "id": "944f2535", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
iduser_id
01321021123463663616778895426007203840
113210231140719697922397974054
213210251273881886732397974054
313210281086659502082810902381
41321029199998656513223758384
\n", "
" ], "text/plain": [ " id user_id\n", "0 1321021123463663616 778895426007203840\n", "1 1321023114071969792 2397974054\n", "2 1321025127388188673 2397974054\n", "3 1321028108665950208 2810902381\n", "4 1321029199998656513 223758384" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tweets_stripped = tweets[[\"id\", \"user_id\"]]\n", "tweets_stripped.head(5)" ] }, { "cell_type": "code", "execution_count": 5, "id": "d2440bee", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " merged[\"connection\"] = merged[\"handle\"]+ \"->\"+ merged[\"tag\"]\n" ] } ], "source": [ "mentions_inter = mentions.merge(tweets_stripped, left_on=\"tweet_id\", right_on=\"id\")\n", "mentions_inter2 = mentions_inter.merge(users, left_on=\"user_id\", right_on=\"id\")\n", "merged = mentions_inter2[[\"tweet_id\", \"user_id\", \"handle\", \"tag\"]]\n", "merged[\"connection\"] = merged[\"handle\"]+ \"->\"+ merged[\"tag\"]\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "95d3a09d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "23796" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged2 = merged.groupby('connection').size().reset_index(name='count').sort_values(by='count', ascending=False)\n", "merged2.size" ] }, { "cell_type": "code", "execution_count": 7, "id": "aea02d5d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idhandletagconnection
01321021123463663616778895426007203840Polizei_OLmahanna196Polizei_OL->mahanna196
11321037834246066181778895426007203840Polizei_OLmahanna196Polizei_OL->mahanna196
21321073940199100416778895426007203840Polizei_OLEmma36166433Polizei_OL->Emma36166433
31321096827937185793778895426007203840Polizei_OLline9600Polizei_OL->line9600
41321338058600570880778895426007203840Polizei_OLKeroth_KatlatasPolizei_OL->Keroth_Katlatas
\n", "
" ], "text/plain": [ " tweet_id user_id handle tag \\\n", "0 1321021123463663616 778895426007203840 Polizei_OL mahanna196 \n", "1 1321037834246066181 778895426007203840 Polizei_OL mahanna196 \n", "2 1321073940199100416 778895426007203840 Polizei_OL Emma36166433 \n", "3 1321096827937185793 778895426007203840 Polizei_OL line9600 \n", "4 1321338058600570880 778895426007203840 Polizei_OL Keroth_Katlatas \n", "\n", " connection \n", "0 Polizei_OL->mahanna196 \n", "1 Polizei_OL->mahanna196 \n", "2 Polizei_OL->Emma36166433 \n", "3 Polizei_OL->line9600 \n", "4 Polizei_OL->Keroth_Katlatas " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged.head(5)" ] }, { "cell_type": "markdown", "id": "75f75a62", "metadata": {}, "source": [ "connection count ist mit \"duplicates\" weil mit tweet_ids, cc_wo_id (connection count without id) ist darum bereingt" ] }, { "cell_type": "code", "execution_count": 8, "id": "375fc61c", "metadata": {}, "outputs": [], "source": [ "connection_count = pd.merge(merged2, merged, how=\"left\", on=\"connection\")\n", "cc_wo_id = connection_count[[\"connection\", \"count\", \"user_id\", \"handle\", \"tag\"]]\n", "cc_wo_id = cc_wo_id.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 9, "id": "d11693e8", "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdst\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_edge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mneighbor_map\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_adj_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.8/dist-packages/pyvis/network.py\u001b[0m in \u001b[0;36madd_edge\u001b[0;34m(self, source, to, **options)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0mdest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'to'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 347\u001b[0m if (\n\u001b[0;32m--> 348\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdest\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mfrm\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 349\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mfrm\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 350\u001b[0m ):\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from pyvis.network import Network\n", "\n", "data = cc_wo_id.head(15000)\n", "\n", "cop_net = Network(height='500px', width='100%', bgcolor='#222222', font_color='white', notebook=True)\n", "\n", "cop_net.barnes_hut()\n", "\n", "sources = data['handle']\n", "targets = data['tag']\n", "weights = data['count']\n", "\n", "edge_data = zip(sources, targets, weights)\n", "\n", "for e in edge_data:\n", " src = e[0]\n", " dst = e[1]\n", " w = e[2]\n", "\n", " cop_net.add_node(src, src, title=src)\n", " cop_net.add_node(dst, dst, title=dst)\n", " cop_net.add_edge(src, dst, value=w)\n", "\n", "neighbor_map = cop_net.get_adj_list()\n", "\n", "# add neighbor data to node hover data\n", "for node in cop_net.nodes:\n", " node['title'] += ' Neighbors:
' + '
'.join(neighbor_map[node['id']])\n", " node['value'] = len(neighbor_map[node['id']])\n", "\n", "#cop_net.show('copnet.html')" ] }, { "cell_type": "code", "execution_count": 10, "id": "f706b956", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
connectioncountuser_idhandletag
0Polizei_Ffm->Stadt_FFM1372272909014Polizei_FfmStadt_FFM
137polizei_nrw_me->KreisMettmann1322389359068polizei_nrw_meKreisMettmann
269Polizei_OHA->Polizei_GOE82773805850687340544Polizei_OHAPolizei_GOE
351LkaBaWue->LkaBaWue78814455464394182656LkaBaWueLkaBaWue
429Polizei_nrw_ms->Feuerwehr_MS692284811875Polizei_nrw_msFeuerwehr_MS
498Polizei_KA->Natenom653029998264Polizei_KANatenom
563Polizei_KA->axel_gutmann613029998264Polizei_KAaxel_gutmann
624polizei_nrw_do->stadtdortmund61769128278polizei_nrw_dostadtdortmund
685Polizei_nrw_ms->muenster_de592284811875Polizei_nrw_msmuenster_de
744polizei_nrw_bo->FW_Bochum572389155192polizei_nrw_boFW_Bochum
\n", "
" ], "text/plain": [ " connection count user_id handle \\\n", "0 Polizei_Ffm->Stadt_FFM 137 2272909014 Polizei_Ffm \n", "137 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n", "269 Polizei_OHA->Polizei_GOE 82 773805850687340544 Polizei_OHA \n", "351 LkaBaWue->LkaBaWue 78 814455464394182656 LkaBaWue \n", "429 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 Polizei_nrw_ms \n", "498 Polizei_KA->Natenom 65 3029998264 Polizei_KA \n", "563 Polizei_KA->axel_gutmann 61 3029998264 Polizei_KA \n", "624 polizei_nrw_do->stadtdortmund 61 769128278 polizei_nrw_do \n", "685 Polizei_nrw_ms->muenster_de 59 2284811875 Polizei_nrw_ms \n", "744 polizei_nrw_bo->FW_Bochum 57 2389155192 polizei_nrw_bo \n", "\n", " tag \n", "0 Stadt_FFM \n", "137 KreisMettmann \n", "269 Polizei_GOE \n", "351 LkaBaWue \n", "429 Feuerwehr_MS \n", "498 Natenom \n", "563 axel_gutmann \n", "624 stadtdortmund \n", "685 muenster_de \n", "744 FW_Bochum " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cc_wo_id.head(10)" ] }, { "cell_type": "code", "execution_count": 11, "id": "6de04c9f", "metadata": {}, "outputs": [], "source": [ "geo_neu = geo[[\"Polizei Account\", \"LAT\", \"LONG\"]]\n", "geo_neu2 = geo[[\"Polizei Account\", \"LAT\", \"LONG\"]]" ] }, { "cell_type": "code", "execution_count": 12, "id": "237cf51b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Polizei AccountLATLONG
0bpol_11-NaN
\n", "
" ], "text/plain": [ " Polizei Account LAT LONG\n", "0 bpol_11 - NaN" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_neu.head(1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "c50a74a7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.lower()\n", ":6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.strip()\n" ] } ], "source": [ "cc_wo_id['handle'] = cc_wo_id['handle'].str.lower()\n", "cc_wo_id['handle'] = cc_wo_id['handle'].str.strip()\n", "cc_wo_id['tag'] = cc_wo_id['tag'].str.lower()\n", "cc_wo_id['tag'] = cc_wo_id['tag'].str.strip()\n", "geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.lower()\n", "geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.strip()\n", "merged_geo = pd.merge(cc_wo_id, geo_neu, how=\"left\", left_on=\"handle\", right_on=\"Polizei Account\")\n", "merged_geo = merged_geo.rename(columns={\"LAT\": \"LAT1\", \"LONG\": \"LONG1\"})\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "2f85d26d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
connectioncountuser_idhandletagPolizei AccountLAT1LONG1
0Polizei_Ffm->Stadt_FFM1372272909014polizei_ffmstadt_ffmpolizei_ffm50.1109228.682127
1polizei_nrw_me->KreisMettmann1322389359068polizei_nrw_mekreismettmannpolizei_nrw_me51.25277786.9777778
2Polizei_OHA->Polizei_GOE82773805850687340544polizei_ohapolizei_goepolizei_oha51.7278410.2508204
3LkaBaWue->LkaBaWue78814455464394182656lkabawuelkabawuelkabawue48.7758469.182932
4Polizei_nrw_ms->Feuerwehr_MS692284811875polizei_nrw_msfeuerwehr_mspolizei_nrw_ms51.96251017.6251879
\n", "
" ], "text/plain": [ " connection count user_id handle \\\n", "0 Polizei_Ffm->Stadt_FFM 137 2272909014 polizei_ffm \n", "1 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n", "2 Polizei_OHA->Polizei_GOE 82 773805850687340544 polizei_oha \n", "3 LkaBaWue->LkaBaWue 78 814455464394182656 lkabawue \n", "4 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 polizei_nrw_ms \n", "\n", " tag Polizei Account LAT1 LONG1 \n", "0 stadt_ffm polizei_ffm 50.110922 8.682127 \n", "1 kreismettmann polizei_nrw_me 51.2527778 6.9777778 \n", "2 polizei_goe polizei_oha 51.72784 10.2508204 \n", "3 lkabawue lkabawue 48.775846 9.182932 \n", "4 feuerwehr_ms polizei_nrw_ms 51.9625101 7.6251879 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_geo.head(5)" ] }, { "cell_type": "code", "execution_count": 15, "id": "8442525b", "metadata": {}, "outputs": [], "source": [ "merged_geo2 = pd.merge(merged_geo, geo_neu, how=\"left\", left_on=\"tag\", right_on=\"Polizei Account\")\n", "merged_geo2 = merged_geo2.rename(columns={\"LAT\": \"LAT2\", \"LONG\": \"LONG2\"})" ] }, { "cell_type": "code", "execution_count": 16, "id": "3defbeb9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
connectioncountuser_idhandletagPolizei Account_xLAT1LONG1Polizei Account_yLAT2LONG2
0Polizei_Ffm->Stadt_FFM1372272909014polizei_ffmstadt_ffmpolizei_ffm50.1109228.682127NaNNaNNaN
1polizei_nrw_me->KreisMettmann1322389359068polizei_nrw_mekreismettmannpolizei_nrw_me51.25277786.9777778NaNNaNNaN
2Polizei_OHA->Polizei_GOE82773805850687340544polizei_ohapolizei_goepolizei_oha51.7278410.2508204polizei_goe51.5412809.915804
3LkaBaWue->LkaBaWue78814455464394182656lkabawuelkabawuelkabawue48.7758469.182932lkabawue48.7758469.182932
4Polizei_nrw_ms->Feuerwehr_MS692284811875polizei_nrw_msfeuerwehr_mspolizei_nrw_ms51.96251017.6251879NaNNaNNaN
\n", "
" ], "text/plain": [ " connection count user_id handle \\\n", "0 Polizei_Ffm->Stadt_FFM 137 2272909014 polizei_ffm \n", "1 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n", "2 Polizei_OHA->Polizei_GOE 82 773805850687340544 polizei_oha \n", "3 LkaBaWue->LkaBaWue 78 814455464394182656 lkabawue \n", "4 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 polizei_nrw_ms \n", "\n", " tag Polizei Account_x LAT1 LONG1 Polizei Account_y \\\n", "0 stadt_ffm polizei_ffm 50.110922 8.682127 NaN \n", "1 kreismettmann polizei_nrw_me 51.2527778 6.9777778 NaN \n", "2 polizei_goe polizei_oha 51.72784 10.2508204 polizei_goe \n", "3 lkabawue lkabawue 48.775846 9.182932 lkabawue \n", "4 feuerwehr_ms polizei_nrw_ms 51.9625101 7.6251879 NaN \n", "\n", " LAT2 LONG2 \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 51.541280 9.915804 \n", "3 48.775846 9.182932 \n", "4 NaN NaN " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_geo2.head(5)" ] }, { "cell_type": "code", "execution_count": 17, "id": "4dc51158", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LAT1LONG1LAT2LONG2count
251.7278410.250820451.5412809.91580482
348.7758469.18293248.7758469.18293278
2248.39740039.993433648.7758469.18293236
2352.12053311.62762451.049328613.738143735
2452.131588911.639960952.605078211.859427934
\n", "
" ], "text/plain": [ " LAT1 LONG1 LAT2 LONG2 count\n", "2 51.72784 10.2508204 51.541280 9.915804 82\n", "3 48.775846 9.182932 48.775846 9.182932 78\n", "22 48.3974003 9.9934336 48.775846 9.182932 36\n", "23 52.120533 11.627624 51.0493286 13.7381437 35\n", "24 52.1315889 11.6399609 52.6050782 11.8594279 34" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_stripped = merged_geo2[[\"LAT1\", \"LONG1\", \"LAT2\", \"LONG2\", \"count\"]]\n", "geo_stripped = geo_stripped.dropna()\n", "geo_stripped = geo_stripped[geo_stripped.LAT1 != \"-\"]\n", "geo_stripped = geo_stripped[geo_stripped.LAT2 != \"-\"]\n", "geo_stripped.head(5)" ] }, { "cell_type": "code", "execution_count": 18, "id": "7267cf7f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LAT1 object\n", "LONG1 object\n", "LAT2 object\n", "LONG2 object\n", "count int64\n", "dtype: object" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_stripped.dtypes" ] }, { "cell_type": "code", "execution_count": 19, "id": "9bf0c75e", "metadata": {}, "outputs": [], "source": [ "geo_stripped[\"LAT1\"] = geo_stripped[\"LAT1\"].astype(float)\n", "geo_stripped[\"LONG1\"] = geo_stripped[\"LONG1\"].astype(float)\n", "geo_stripped[\"LAT2\"] = geo_stripped[\"LAT2\"].astype(float)\n", "geo_stripped[\"LONG2\"] = geo_stripped[\"LONG2\"].astype(float)\n", "geo_stripped[\"count\"] = geo_stripped[\"count\"].astype(int)" ] }, { "cell_type": "code", "execution_count": 20, "id": "7206e2d7", "metadata": {}, "outputs": [], "source": [ "geo_stripped = geo_stripped.reset_index()" ] }, { "cell_type": "code", "execution_count": 21, "id": "67612718", "metadata": {}, "outputs": [], "source": [ "import folium\n", "\n", "point = []\n", "\n", "\n", "for i in geo_stripped.index:\n", " point.append(([geo_stripped[\"LAT1\"][i], geo_stripped[\"LONG1\"][i]], \n", " [geo_stripped[\"LAT2\"][i], geo_stripped[\"LONG2\"][i]]))\n", " " ] }, { "cell_type": "code", "execution_count": 22, "id": "625c4037", "metadata": {}, "outputs": [], "source": [ "#ave_lat = sum(p[0] for p in point)/len(point)\n", "#ave_lon = sum(p[1] for p in point)/len(point)" ] }, { "cell_type": "code", "execution_count": 81, "id": "944eafa2", "metadata": {}, "outputs": [], "source": [ "# Load map centred on average coordinates\n" ] }, { "cell_type": "code", "execution_count": 82, "id": "9e05a2ee", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexLAT1LONG1LAT2LONG2count
0251.72784010.25082051.5412809.91580482
1348.7758469.18293248.7758469.18293278
22248.3974009.99343448.7758469.18293236
32352.12053311.62762451.04932913.73814435
42452.13158911.63996152.60507811.85942834
\n", "
" ], "text/plain": [ " index LAT1 LONG1 LAT2 LONG2 count\n", "0 2 51.727840 10.250820 51.541280 9.915804 82\n", "1 3 48.775846 9.182932 48.775846 9.182932 78\n", "2 22 48.397400 9.993434 48.775846 9.182932 36\n", "3 23 52.120533 11.627624 51.049329 13.738144 35\n", "4 24 52.131589 11.639961 52.605078 11.859428 34" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_stripped.head(5)" ] }, { "cell_type": "code", "execution_count": 83, "id": "480d4bd8", "metadata": {}, "outputs": [], "source": [ "thickness = geo_stripped[\"count\"][0]" ] }, { "cell_type": "code", "execution_count": 96, "id": "ac0dcc25", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " geo_handle[\"LAT\"] = geo_handle[\"LAT\"].astype(float)\n", ":5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " geo_handle[\"LONG\"] = geo_handle[\"LONG\"].astype(float)\n" ] } ], "source": [ "geo_handle_pre = geo[[\"Polizei Account\", \"Name\", \"LAT\", \"LONG\"]]\n", "geo_handle_pre = geo_handle_pre.dropna()\n", "geo_handle = geo_handle_pre[geo_handle_pre.LAT != \"-\"]\n", "geo_handle[\"LAT\"] = geo_handle[\"LAT\"].astype(float)\n", "geo_handle[\"LONG\"] = geo_handle[\"LONG\"].astype(float)\n", "geo_handle = geo_handle.reset_index()" ] }, { "cell_type": "code", "execution_count": 99, "id": "c2694f60", "metadata": {}, "outputs": [], "source": [ "import colorsys\n", "import branca\n", "import branca.colormap as cm\n", "\n", "my_map = folium.Map(location=[geo_stripped[\"LAT1\"][0], geo_stripped[\"LONG1\"][0],], zoom_start=8)\n", "\n", "i = 0\n", "for i in geo_handle.index:\n", " text = geo_handle[\"Name\"].iloc[i]\n", " name_lat = geo_handle[\"LAT\"].iloc[i]\n", " name_long = geo_handle[\"LONG\"].iloc[i]\n", " #print(i)\n", " folium.map.Marker(\n", " [name_lat, name_long],\n", " icon=folium.DivIcon(icon_size=(20,10),\n", " icon_anchor=(0,0), html='
%s
'%text)).add_to(my_map)\n", "\n", "colormap = cm.LinearColormap(colors=['lightblue', 'orange', 'red'], index=[0,5,60],vmin=0,vmax=90)\n", "i = 0\n", "for j in point:\n", " thickness = int(geo_stripped[\"count\"][i])\n", " #folium.PolyLine(j, color=rgb, weight=thickness, opacity=1).add_to(my_map)\n", " folium.PolyLine(j, color=colormap(thickness), weight=thickness, opacity=0.8).add_to(my_map)\n", " i = i+1\n", "\n" ] }, { "cell_type": "code", "execution_count": 100, "id": "4865f7d4", "metadata": {}, "outputs": [], "source": [ "# Save map\n", "my_map.save(\"./connections_with_name.html\")" ] }, { "cell_type": "code", "execution_count": 101, "id": "45ad95d5", "metadata": {}, "outputs": [], "source": [ "my_map_ohne = folium.Map(location=[geo_stripped[\"LAT1\"][0], geo_stripped[\"LONG1\"][0],], zoom_start=8)\n", "\n", "colormap = cm.LinearColormap(colors=['lightblue', 'orange', 'red'], index=[0,5,60],vmin=0,vmax=90)\n", "i = 0\n", "for j in point:\n", " thickness = int(geo_stripped[\"count\"][i])\n", " #folium.PolyLine(j, color=rgb, weight=thickness, opacity=1).add_to(my_map)\n", " folium.PolyLine(j, color=colormap(thickness), weight=thickness, opacity=0.8).add_to(my_map_ohne)\n", " i = i+1\n" ] }, { "cell_type": "code", "execution_count": 102, "id": "42212a74", "metadata": {}, "outputs": [], "source": [ "# Save map\n", "my_map_ohne.save(\"./connections_without_name.html\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d1d73f6d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }