init

2023-03-26 18:36:49 +02:00 · 2023-03-26 18:36:49 +02:00 · 8d3c8b3974
commit 8d3c8b3974
77 changed files with 682928 additions and 0 deletions
--- a/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/Topic
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/Topic
@ -0,0 +1,394 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7158ac22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import pyLDAvis.sklearn\n",
+    "from sklearn.decomposition import LatentDirichletAllocation\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "import pandas as pd\n",
+    "import spacy\n",
+    "from multiprocess import Pool"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69f33a46",
+   "metadata": {},
+   "source": [
+    "Funktionen zur Vorverarbeitung"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1c66c06c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def filterSentencesByMinWordCount(text, minWordCount):\n",
+    "    sentenceList = []\n",
+    "    doc = nlp(text)\n",
+    "    for sent in doc.sents:\n",
+    "        wordList = []\n",
+    "        sent.text.rstrip()\n",
+    "        for word in sent:\n",
+    "            wordList.append(word)\n",
+    "        if len(wordList) >= minWordCount:\n",
+    "            sentenceList.append(sent.text.rstrip())\n",
+    "    return sentenceList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3b9d084d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def extractSentences(document):\n",
+    "    logging.debug('Extracting Sentences')\n",
+    "    text = extractBodyContent(document)\n",
+    "    sentenceList = filterSentencesByMinWordCount(text, 4)\n",
+    "    return sentenceList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7d85891e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def tokenizeSentence(doc):\n",
+    "    logging.debug('Tokenizing')\n",
+    "    tokenList = []\n",
+    "    for token in doc:\n",
+    "        childrenList = []\n",
+    "        for child in token.children:\n",
+    "            childToken = ScToken(child.text,\n",
+    "                                 child.lemma_,\n",
+    "                                 child.pos_, str(spacy.explain(child.pos_)),\n",
+    "                                 child.tag_, str(spacy.explain(child.tag_)),\n",
+    "                                 child.dep_, str(spacy.explain(child.dep_)),\n",
+    "                                 child.shape_, child.is_alpha, child.is_stop)\n",
+    "            childrenList.append(childToken)\n",
+    "\n",
+    "        scToken = ScToken(token.text,\n",
+    "                          token.lemma_,\n",
+    "                          token.pos_, str(spacy.explain(token.pos_)),\n",
+    "                          token.tag_, str(spacy.explain(token.tag_)),\n",
+    "                          token.dep_, str(spacy.explain(token.dep_)),\n",
+    "                          token.shape_, token.is_alpha, token.is_stop,\n",
+    "                          childrenList)\n",
+    "        tokenList.append(scToken)\n",
+    "    return tokenList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7564c883",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def chunkSentence(doc):\n",
+    "    logging.debug('Chunking')\n",
+    "    chunkList = []\n",
+    "    for chunk in doc.noun_chunks:\n",
+    "        scChunk = ScChunk(chunk.text, chunk.root.text,\n",
+    "                          chunk.root.dep_, chunk.root.head.text)\n",
+    "        chunkList.append(scChunk)\n",
+    "    return chunkList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5db74302",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def findEntitiesInSentence(doc):\n",
+    "    logging.debug('Extracting Named Entities')\n",
+    "    entityList = []\n",
+    "    for ent in doc.ents:\n",
+    "        entity = ScEntity(ent.text, ent.start_char, ent.end_char,\n",
+    "                          ent.label_, str(spacy.explain(ent.label_)))\n",
+    "        entityList.append(entity)\n",
+    "    return entityList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b6753a90",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def fillSentences(document):\n",
+    "    logging.info(\n",
+    "        'Building Sentences (Tokenizing, Chunking, Named Entity Recognition)')\n",
+    "    sentenceList = []\n",
+    "    sentences = extractSentences(document)\n",
+    "    for i, sentence in enumerate(sentences):\n",
+    "        doc = nlp(sentence)\n",
+    "        id = i\n",
+    "        tokens = tokenizeSentence(doc)\n",
+    "        chunks = chunkSentence(doc)\n",
+    "        entities = findEntitiesInSentence(doc)\n",
+    "        scSentence = ScSentence(id, sentence, tokens, chunks, entities)\n",
+    "        sentenceList.append(scSentence)\n",
+    "\n",
+    "    return sentenceList"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "9af7a5c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def fillSentencesAsOneString(sentences):\n",
+    "    sentencesAsOneString = str()\n",
+    "    for sentence in sentences:\n",
+    "        sentencesAsOneString += sentence.text\n",
+    "    return sentencesAsOneString"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "8f952c82",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def removeStopwords(text):\n",
+    "    doc = nlp(text)\n",
+    "    tokens = tokenizeSentence(doc)\n",
+    "    chunksNoStopwords = [\n",
+    "        t.text for t in tokens if (not t.isStopword)]\n",
+    "    return \" \".join(chunksNoStopwords)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "28910141",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'ScToken' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-14-7209b5cec518>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mdef\u001b[0m \u001b[0mnumberOfStopwords\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokens\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mScToken\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m     \u001b[0mcount\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtokens\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misStopword\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m             \u001b[0mcount\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'ScToken' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "def numberOfStopwords(tokens: [ScToken]):\n",
+    "    count = 0\n",
+    "    for t in tokens:\n",
+    "        if t.isStopword:\n",
+    "            count += 1\n",
+    "    return count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ce7cc9c1",
+   "metadata": {},
+   "source": [
+    "## Analyse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "1adb09b7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    " tweet_csv = '../data/copbird_table_tweet.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "c0936ecb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "NUM_TOPICS=10\n",
+    "NUM_FEATURES=1000\n",
+    "NUM_TOP_WORDS=25"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "dbf0281f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\chris\\pycharmprojects\\copbird-group-16\\copbird-venv\\lib\\site-packages\\ipykernel\\ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
+      "  and should_run_async(code)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def get_tweets(path, limit=None):\n",
+    "    df_csv = pd.read_csv(path, nrows=limit, parse_dates=['created_at'],\n",
+    "                         encoding='utf-8-sig')\n",
+    "\n",
+    "    df_csv.drop(columns=['created_at', 'like_count', 'retweet_count', 'reply_count', 'quote_count'], inplace=True)\n",
+    "\n",
+    "    nlp = spacy.load(\"de_core_news_lg\")\n",
+    "    nlp.Defaults.stop_words |= {\"&amp\", \"amp\"}\n",
+    "    nlp.add_pipe('emoji', first=True)\n",
+    "    return list(\n",
+    "        nlp.pipe(df_csv['tweet_text'], disable=[\"tok2vec\", \"tagger\", \"parser\", \"attribute_ruler\"], n_process=-1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c39f658",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "copbird-env",
+   "language": "python",
+   "name": "copbird-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/keywords-tweets-checkpoint.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/keywords-tweets-checkpoint.ipynb
--- a/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/pressemitteilung-selfmade-api-checkpoint.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/pressemitteilung-selfmade-api-checkpoint.ipynb
@ -0,0 +1,490 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cce66876",
+   "metadata": {},
+   "source": [
+    "# Interface Presseportal"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f12d7022",
+   "metadata": {},
+   "source": [
+    "Das Presseportal bietet eine Platform, bei der mittels GET-requests die Pressemitteilungen verschiedener Institutionen (Polizei, Feuerwehr, ...), in bestimmten Zeiträumen in gegebenen Gebieten extrahiert werden können. Dafür gibt es auch eine API."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b07aef9f",
+   "metadata": {},
+   "source": [
+    "Beispiel URL: `https://www.presseportal.de/blaulicht/d/polizei/l/hessen/30?startDate=2021-05-04&endDate=2021-05-04`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "258338d0",
+   "metadata": {},
+   "source": [
+    "Da eine große Menge an Tweets angefragt werden und Requests ziemlich lange benötigen, muss die Anfrage optimiert werden:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b07fac3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import calendar\n",
+    "import time\n",
+    "import os\n",
+    "import csv\n",
+    "\n",
+    "from tqdm.notebook import tqdm\n",
+    "from datetime import datetime\n",
+    "from bs4 import BeautifulSoup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0dfce15a",
+   "metadata": {},
+   "source": [
+    "Um Pressemitteilungen sinnvoll zu speichern, werden sie als Klasse dargestellt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6c0b30a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Pressemitteilung:\n",
+    "    def __init__(self, article_id, timestamp, location, text, bundesland):\n",
+    "        self.article_id = article_id\n",
+    "        self.timestamp = timestamp\n",
+    "        self.location = location\n",
+    "        self.text = text\n",
+    "        self.bundesland=bundesland\n",
+    "    \n",
+    "    def __str__(self):\n",
+    "        return f\"[{self.article_id}] {self.timestamp} {self.location} | {' '.join(self.text.split()[:6])}\"\n",
+    "    \n",
+    "    def to_row(self):\n",
+    "        return [self.article_id, self.timestamp, self.location, self.bundesland, self.text]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "63cceebe",
+   "metadata": {},
+   "source": [
+    "**Konstanten und Pfade**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8bcc877f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "REQUEST_HEADERS = {\n",
+    "    \"User-Agent\": (\n",
+    "        \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 \"\n",
+    "        \"(KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36\"\n",
+    "    )\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c637ac38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_FOLDER = os.path.join(\"..\", \"data\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f094dee0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BUNDESLAENDER = [\n",
+    "    \"baden-wuerttemberg\",\n",
+    "    \"bayern\",\n",
+    "    \"berlin-brandenburg\",\n",
+    "    \"bremen\",\n",
+    "    \"hamburg\",\n",
+    "    \"hessen\",\n",
+    "    \"mecklenburg-vorpommern\",\n",
+    "    \"niedersachsen\",\n",
+    "    \"nordrhein-westfalen\",\n",
+    "    \"rheinland-pfalz\",\n",
+    "    \"saarland\",\n",
+    "    \"sachsen\",\n",
+    "    \"sachsen-anhalt\",\n",
+    "    \"schleswig-holstein\",\n",
+    "    \"thueringen\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "84632391",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def requests_get(request):\n",
+    "    return requests.get(request, headers=REQUEST_HEADERS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "1af0bdbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_response(response, bundesland=None):\n",
+    "    \"\"\"Extrahiere aus der Response einer Request alle Pressemitteilungen\n",
+    "    \n",
+    "    Args:\n",
+    "        response (:obj:`Response`)\n",
+    "        bundesland (:obj:`str`): Kann mit angegeben, falls es in der Suche relevant war. Default = None\n",
+    "    \n",
+    "    Returns:\n",
+    "        list of :obj:`Pressemitteilung`\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    mitteilungen = []\n",
+    "    \n",
+    "    soup = BeautifulSoup(response.content, 'html.parser')\n",
+    "    for article in soup.find_all('article'):\n",
+    "        data_url = article['data-url']\n",
+    "        article_id = '-'.join(article['data-url'].split('/')[-2:])\n",
+    "        meta = article.find('div')\n",
+    "        \n",
+    "        timestamp_str = meta.find(class_=\"date\")\n",
+    "        \n",
+    "        if timestamp_str is not None:\n",
+    "            timestamp_str = timestamp_str.text\n",
+    "            timestamp = datetime.strptime(timestamp_str, '%d.%m.%Y – %H:%M')\n",
+    "        else:\n",
+    "            timestamp = None\n",
+    "        \n",
+    "        location_str = meta.find(class_=\"news-topic\")\n",
+    "        location_str = location_str.text if location_str is not None else None\n",
+    "        \n",
+    "        p_texts = article.findAll('p')\n",
+    "        if len(p_texts) > 1:\n",
+    "            text = p_texts[1].text\n",
+    "        else:\n",
+    "            text = ''\n",
+    "        \n",
+    "        mitteilungen.append(Pressemitteilung(article_id, timestamp, location_str, text, bundesland))\n",
+    "    \n",
+    "    return mitteilungen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c62c06c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_get_request(*, site=1, location=None, start_date=None, end_date=None):\n",
+    "    \"\"\"Simulation einer API: Erzeuge aus Parametern eine URL\n",
+    "    \n",
+    "    Args:\n",
+    "        site (int, default=1): Aktuelle Seite, auf der man sich befinden soll. Ist in der URL in 30er Schritten angegeben\n",
+    "        location (:obj:`str`, default=None): Bundesland bzw. Stadt\n",
+    "        start_date (:obj:`str`, default=None)\n",
+    "        end_date (:obj:`str`, default=None)\n",
+    "    Returns:\n",
+    "        str: URL\n",
+    "    \"\"\"\n",
+    "    url = f\"https://www.presseportal.de/blaulicht/d/polizei\"\n",
+    "    \n",
+    "    if location is not None:\n",
+    "        url += f\"/l/{location}\"\n",
+    "    \n",
+    "    if site > 1:\n",
+    "        url += f\"/{site*30}\"\n",
+    "    \n",
+    "    if start_date is not None or end_date is not None:\n",
+    "        url += \"?\"\n",
+    "    \n",
+    "        if start_date is not None:\n",
+    "            url += f\"startDate={start_date}\"\n",
+    "        \n",
+    "            if end_date is not None:\n",
+    "                url += \"&\"\n",
+    "        \n",
+    "        if end_date is not None:\n",
+    "            url += f\"endDate={end_date}\"\n",
+    "    \n",
+    "    return url"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c67c9bc",
+   "metadata": {},
+   "source": [
+    "## Beispiel: Hamburg "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "aff924d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'https://www.presseportal.de/blaulicht/d/polizei/l/hamburg/90?startDate=2021-01-13&endDate=2021-03-20'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "url = create_get_request(location=\"hamburg\", site=3, start_date=\"2021-01-13\", end_date=\"2021-03-20\")\n",
+    "url"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6e2b9091",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[6337-4840243] 2021-02-16 17:41:00 Hamburg | Hamburg (ots) - Tatzeit: 15.02.2021, 08:15\n",
+      "[6337-4839937] 2021-02-16 13:14:00 Hamburg | Hamburg (ots) - Tatzeiten: a. 15.02.2021,\n",
+      "[6337-4839709] 2021-02-16 11:33:00 Hamburg | Hamburg (ots) - Tatzeit: 15.02.2021, 18:25\n",
+      "[6337-4839544] 2021-02-16 10:31:00 Hamburg | Hamburg (ots) - Zeit: 15.02.2021, 01:34\n",
+      "[6337-4838489] 2021-02-15 11:48:00 Hamburg | Hamburg (ots) - Tatzeit: 14.02.2021; 19:17\n"
+     ]
+    }
+   ],
+   "source": [
+    "for mitteilung in extract_response(requests_get(url))[:5]:\n",
+    "    print(mitteilung)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e50af557",
+   "metadata": {},
+   "source": [
+    "## Effizientes Einlesen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4a9580a",
+   "metadata": {},
+   "source": [
+    "Um die Dateien sinnhaft zu extrahieren, ohne auf einen Schlag zu viele Anfragen zu tätigen, läuft das Programm synchron mit Pausen (1Sek / Anfrage). Die Hauptfunktion sucht für einen gegebenen Tag alle Pressemeldungen der Polizei und sortiert diese nach Bundesland bzw. Stadt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "da927e30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _get_meldungen_for_date_and_bundesland(year, month, day, bundesland):\n",
+    "    \"\"\"Suche alle Meldungen für ein Bundesland zu einem konkreten Tag\"\"\"\n",
+    "\n",
+    "    meldungen = []\n",
+    "    site = 1\n",
+    "    \n",
+    "    start_date = datetime(year, month, day).strftime(\"%Y-%m-%d\")\n",
+    "    end_date = datetime(year, month, day).strftime(\"%Y-%m-%d\")\n",
+    "    request = create_get_request(site=site, location=bundesland, start_date=start_date, end_date=end_date)\n",
+    "    \n",
+    "    new_meldungen = extract_response(requests_get(request), bundesland=bundesland)\n",
+    "    meldungen.extend(new_meldungen)\n",
+    "    \n",
+    "    pbar = tqdm(desc=bundesland)\n",
+    "    while len(new_meldungen) != 0:\n",
+    "        time.sleep(1)\n",
+    "        site += 1\n",
+    "        \n",
+    "        request = create_get_request(\n",
+    "            site=site, location=bundesland, start_date=start_date, end_date=end_date,\n",
+    "        )\n",
+    "        \n",
+    "        new_meldungen = extract_response(requests_get(request), bundesland=bundesland)\n",
+    "        meldungen.extend(new_meldungen)\n",
+    "        pbar.update(1)\n",
+    "    pbar.close()\n",
+    "        \n",
+    "    return meldungen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "85508758",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_meldungen_for_date(year, month, day):\n",
+    "    \"\"\"Extrahiere alle Meldungen für einen Tag\n",
+    "    \n",
+    "    Args:\n",
+    "        year (int): Jahr\n",
+    "        month (int): Monat\n",
+    "        day (int): Tag\n",
+    "    \"\"\"\n",
+    "\n",
+    "    meldungen_dict = {}\n",
+    "    \n",
+    "    for bundesland in BUNDESLAENDER:\n",
+    "        meldungen = _get_meldungen_for_date_and_bundesland(year, month, day, bundesland)\n",
+    "        meldungen_dict[bundesland] = meldungen\n",
+    "    \n",
+    "    return meldungen_dict"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f938d8a9",
+   "metadata": {},
+   "source": [
+    "## Speichern der Daten in CSV-Dateien"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67374d3b",
+   "metadata": {},
+   "source": [
+    "Zur sinnvollen Speicherung werden alle Daten eines Tages in genau einer CSV-Datei gespeichert. Diese können danach (manuell) als ZIP des Monats zusammengefasst werden. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "276e700d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_meldungen_in_csv(year, month, day):\n",
+    "    \"\"\"Speichere alle Meldungen für ein Datum in einer CSV. Im Namen der CSV steht das Datum.\"\"\"\n",
+    "\n",
+    "    filename = f\"{year}-{month}-{day}_presseportal.csv\"\n",
+    "    path = os.path.join(DATA_FOLDER, filename)\n",
+    "    meldungen_per_bundesland = get_meldungen_for_date(year, month, day)\n",
+    "    \n",
+    "    with open(path, 'w', newline='', encoding='UTF8') as f:\n",
+    "        writer = csv.writer(f)\n",
+    "        writer.writerow(['article_id', 'timestamp', 'location', 'bundesland', 'content'])\n",
+    "        \n",
+    "        for bundesland, meldungen in meldungen_per_bundesland.items():\n",
+    "            for meldung in meldungen:\n",
+    "                writer.writerow(meldung.to_row())\n",
+    "    \n",
+    "    print(f\"File '{filename}' created\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "c5d0bdbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_month(year, month):\n",
+    "    month_end_day = calendar.monthrange(year, month)[1]\n",
+    "    \n",
+    "    for i in range(0, month_end_day):\n",
+    "        store_meldungen_in_csv(year, month, i+1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9f3e24b",
+   "metadata": {},
+   "source": [
+    "## Auswertung: Wie viele Einträge pro Bundesland?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f600d3c",
+   "metadata": {},
+   "source": [
+    "Für fortführende Visualisierung und um zu testen, ob der Algorithmus richtig funktioniert, werden hier alle Pressemitteilungen aller Bundesländer ausgezählt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "b7c85078",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "counter = {}\n",
+    "\n",
+    "for filename in os.listdir('../data/'):\n",
+    "    if filename.endswith(\"_presseportal.csv\"):\n",
+    "        path = '../data/' + filename\n",
+    "        \n",
+    "        with open(path, 'r', encoding='UTF8') as f_in:\n",
+    "            reader = csv.reader(f_in)\n",
+    "            next(reader)\n",
+    "            for row in reader:\n",
+    "                bundesland = row[3]\n",
+    "                if bundesland not in counter:\n",
+    "                    counter[bundesland] = 1\n",
+    "                else:\n",
+    "                    counter[bundesland] += 1\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "copbird-env",
+   "language": "python",
+   "name": "copbird-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/simons-notebook-checkpoint.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/simons-notebook-checkpoint.ipynb
--- a/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/tweet-pm-counts-checkpoint.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/.ipynb_checkpoints/tweet-pm-counts-checkpoint.ipynb
--- a/ergebnisse_hackathon_repo/team-16/notebooks/keywords-tweets.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/keywords-tweets.ipynb
--- a/ergebnisse_hackathon_repo/team-16/notebooks/pressemitteilung-selfmade-api.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/pressemitteilung-selfmade-api.ipynb
@ -0,0 +1,490 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cce66876",
+   "metadata": {},
+   "source": [
+    "# Interface Presseportal"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f12d7022",
+   "metadata": {},
+   "source": [
+    "Das Presseportal bietet eine Platform, bei der mittels GET-requests die Pressemitteilungen verschiedener Institutionen (Polizei, Feuerwehr, ...), in bestimmten Zeiträumen in gegebenen Gebieten extrahiert werden können. Dafür gibt es auch eine API."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b07aef9f",
+   "metadata": {},
+   "source": [
+    "Beispiel URL: `https://www.presseportal.de/blaulicht/d/polizei/l/hessen/30?startDate=2021-05-04&endDate=2021-05-04`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "258338d0",
+   "metadata": {},
+   "source": [
+    "Da eine große Menge an Tweets angefragt werden und Requests ziemlich lange benötigen, muss die Anfrage optimiert werden:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b07fac3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import calendar\n",
+    "import time\n",
+    "import os\n",
+    "import csv\n",
+    "\n",
+    "from tqdm.notebook import tqdm\n",
+    "from datetime import datetime\n",
+    "from bs4 import BeautifulSoup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0dfce15a",
+   "metadata": {},
+   "source": [
+    "Um Pressemitteilungen sinnvoll zu speichern, werden sie als Klasse dargestellt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6c0b30a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Pressemitteilung:\n",
+    "    def __init__(self, article_id, timestamp, location, text, bundesland):\n",
+    "        self.article_id = article_id\n",
+    "        self.timestamp = timestamp\n",
+    "        self.location = location\n",
+    "        self.text = text\n",
+    "        self.bundesland=bundesland\n",
+    "    \n",
+    "    def __str__(self):\n",
+    "        return f\"[{self.article_id}] {self.timestamp} {self.location} | {' '.join(self.text.split()[:6])}\"\n",
+    "    \n",
+    "    def to_row(self):\n",
+    "        return [self.article_id, self.timestamp, self.location, self.bundesland, self.text]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "63cceebe",
+   "metadata": {},
+   "source": [
+    "**Konstanten und Pfade**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8bcc877f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "REQUEST_HEADERS = {\n",
+    "    \"User-Agent\": (\n",
+    "        \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 \"\n",
+    "        \"(KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36\"\n",
+    "    )\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c637ac38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_FOLDER = os.path.join(\"..\", \"data\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f094dee0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BUNDESLAENDER = [\n",
+    "    \"baden-wuerttemberg\",\n",
+    "    \"bayern\",\n",
+    "    \"berlin-brandenburg\",\n",
+    "    \"bremen\",\n",
+    "    \"hamburg\",\n",
+    "    \"hessen\",\n",
+    "    \"mecklenburg-vorpommern\",\n",
+    "    \"niedersachsen\",\n",
+    "    \"nordrhein-westfalen\",\n",
+    "    \"rheinland-pfalz\",\n",
+    "    \"saarland\",\n",
+    "    \"sachsen\",\n",
+    "    \"sachsen-anhalt\",\n",
+    "    \"schleswig-holstein\",\n",
+    "    \"thueringen\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "84632391",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def requests_get(request):\n",
+    "    return requests.get(request, headers=REQUEST_HEADERS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "1af0bdbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_response(response, bundesland=None):\n",
+    "    \"\"\"Extrahiere aus der Response einer Request alle Pressemitteilungen\n",
+    "    \n",
+    "    Args:\n",
+    "        response (:obj:`Response`)\n",
+    "        bundesland (:obj:`str`): Kann mit angegeben, falls es in der Suche relevant war. Default = None\n",
+    "    \n",
+    "    Returns:\n",
+    "        list of :obj:`Pressemitteilung`\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    mitteilungen = []\n",
+    "    \n",
+    "    soup = BeautifulSoup(response.content, 'html.parser')\n",
+    "    for article in soup.find_all('article'):\n",
+    "        data_url = article['data-url']\n",
+    "        article_id = '-'.join(article['data-url'].split('/')[-2:])\n",
+    "        meta = article.find('div')\n",
+    "        \n",
+    "        timestamp_str = meta.find(class_=\"date\")\n",
+    "        \n",
+    "        if timestamp_str is not None:\n",
+    "            timestamp_str = timestamp_str.text\n",
+    "            timestamp = datetime.strptime(timestamp_str, '%d.%m.%Y – %H:%M')\n",
+    "        else:\n",
+    "            timestamp = None\n",
+    "        \n",
+    "        location_str = meta.find(class_=\"news-topic\")\n",
+    "        location_str = location_str.text if location_str is not None else None\n",
+    "        \n",
+    "        p_texts = article.findAll('p')\n",
+    "        if len(p_texts) > 1:\n",
+    "            text = p_texts[1].text\n",
+    "        else:\n",
+    "            text = ''\n",
+    "        \n",
+    "        mitteilungen.append(Pressemitteilung(article_id, timestamp, location_str, text, bundesland))\n",
+    "    \n",
+    "    return mitteilungen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c62c06c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_get_request(*, site=1, location=None, start_date=None, end_date=None):\n",
+    "    \"\"\"Simulation einer API: Erzeuge aus Parametern eine URL\n",
+    "    \n",
+    "    Args:\n",
+    "        site (int, default=1): Aktuelle Seite, auf der man sich befinden soll. Ist in der URL in 30er Schritten angegeben\n",
+    "        location (:obj:`str`, default=None): Bundesland bzw. Stadt\n",
+    "        start_date (:obj:`str`, default=None)\n",
+    "        end_date (:obj:`str`, default=None)\n",
+    "    Returns:\n",
+    "        str: URL\n",
+    "    \"\"\"\n",
+    "    url = f\"https://www.presseportal.de/blaulicht/d/polizei\"\n",
+    "    \n",
+    "    if location is not None:\n",
+    "        url += f\"/l/{location}\"\n",
+    "    \n",
+    "    if site > 1:\n",
+    "        url += f\"/{site*30}\"\n",
+    "    \n",
+    "    if start_date is not None or end_date is not None:\n",
+    "        url += \"?\"\n",
+    "    \n",
+    "        if start_date is not None:\n",
+    "            url += f\"startDate={start_date}\"\n",
+    "        \n",
+    "            if end_date is not None:\n",
+    "                url += \"&\"\n",
+    "        \n",
+    "        if end_date is not None:\n",
+    "            url += f\"endDate={end_date}\"\n",
+    "    \n",
+    "    return url"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c67c9bc",
+   "metadata": {},
+   "source": [
+    "## Beispiel: Hamburg "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "aff924d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'https://www.presseportal.de/blaulicht/d/polizei/l/hamburg/90?startDate=2021-01-13&endDate=2021-03-20'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "url = create_get_request(location=\"hamburg\", site=3, start_date=\"2021-01-13\", end_date=\"2021-03-20\")\n",
+    "url"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6e2b9091",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[6337-4840243] 2021-02-16 17:41:00 Hamburg | Hamburg (ots) - Tatzeit: 15.02.2021, 08:15\n",
+      "[6337-4839937] 2021-02-16 13:14:00 Hamburg | Hamburg (ots) - Tatzeiten: a. 15.02.2021,\n",
+      "[6337-4839709] 2021-02-16 11:33:00 Hamburg | Hamburg (ots) - Tatzeit: 15.02.2021, 18:25\n",
+      "[6337-4839544] 2021-02-16 10:31:00 Hamburg | Hamburg (ots) - Zeit: 15.02.2021, 01:34\n",
+      "[6337-4838489] 2021-02-15 11:48:00 Hamburg | Hamburg (ots) - Tatzeit: 14.02.2021; 19:17\n"
+     ]
+    }
+   ],
+   "source": [
+    "for mitteilung in extract_response(requests_get(url))[:5]:\n",
+    "    print(mitteilung)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e50af557",
+   "metadata": {},
+   "source": [
+    "## Effizientes Einlesen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4a9580a",
+   "metadata": {},
+   "source": [
+    "Um die Dateien sinnhaft zu extrahieren, ohne auf einen Schlag zu viele Anfragen zu tätigen, läuft das Programm synchron mit Pausen (1Sek / Anfrage). Die Hauptfunktion sucht für einen gegebenen Tag alle Pressemeldungen der Polizei und sortiert diese nach Bundesland bzw. Stadt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "da927e30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _get_meldungen_for_date_and_bundesland(year, month, day, bundesland):\n",
+    "    \"\"\"Suche alle Meldungen für ein Bundesland zu einem konkreten Tag\"\"\"\n",
+    "\n",
+    "    meldungen = []\n",
+    "    site = 1\n",
+    "    \n",
+    "    start_date = datetime(year, month, day).strftime(\"%Y-%m-%d\")\n",
+    "    end_date = datetime(year, month, day).strftime(\"%Y-%m-%d\")\n",
+    "    request = create_get_request(site=site, location=bundesland, start_date=start_date, end_date=end_date)\n",
+    "    \n",
+    "    new_meldungen = extract_response(requests_get(request), bundesland=bundesland)\n",
+    "    meldungen.extend(new_meldungen)\n",
+    "    \n",
+    "    pbar = tqdm(desc=bundesland)\n",
+    "    while len(new_meldungen) != 0:\n",
+    "        time.sleep(1)\n",
+    "        site += 1\n",
+    "        \n",
+    "        request = create_get_request(\n",
+    "            site=site, location=bundesland, start_date=start_date, end_date=end_date,\n",
+    "        )\n",
+    "        \n",
+    "        new_meldungen = extract_response(requests_get(request), bundesland=bundesland)\n",
+    "        meldungen.extend(new_meldungen)\n",
+    "        pbar.update(1)\n",
+    "    pbar.close()\n",
+    "        \n",
+    "    return meldungen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "85508758",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_meldungen_for_date(year, month, day):\n",
+    "    \"\"\"Extrahiere alle Meldungen für einen Tag\n",
+    "    \n",
+    "    Args:\n",
+    "        year (int): Jahr\n",
+    "        month (int): Monat\n",
+    "        day (int): Tag\n",
+    "    \"\"\"\n",
+    "\n",
+    "    meldungen_dict = {}\n",
+    "    \n",
+    "    for bundesland in BUNDESLAENDER:\n",
+    "        meldungen = _get_meldungen_for_date_and_bundesland(year, month, day, bundesland)\n",
+    "        meldungen_dict[bundesland] = meldungen\n",
+    "    \n",
+    "    return meldungen_dict"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f938d8a9",
+   "metadata": {},
+   "source": [
+    "## Speichern der Daten in CSV-Dateien"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67374d3b",
+   "metadata": {},
+   "source": [
+    "Zur sinnvollen Speicherung werden alle Daten eines Tages in genau einer CSV-Datei gespeichert. Diese können danach (manuell) als ZIP des Monats zusammengefasst werden. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "276e700d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_meldungen_in_csv(year, month, day):\n",
+    "    \"\"\"Speichere alle Meldungen für ein Datum in einer CSV. Im Namen der CSV steht das Datum.\"\"\"\n",
+    "\n",
+    "    filename = f\"{year}-{month}-{day}_presseportal.csv\"\n",
+    "    path = os.path.join(DATA_FOLDER, filename)\n",
+    "    meldungen_per_bundesland = get_meldungen_for_date(year, month, day)\n",
+    "    \n",
+    "    with open(path, 'w', newline='', encoding='UTF8') as f:\n",
+    "        writer = csv.writer(f)\n",
+    "        writer.writerow(['article_id', 'timestamp', 'location', 'bundesland', 'content'])\n",
+    "        \n",
+    "        for bundesland, meldungen in meldungen_per_bundesland.items():\n",
+    "            for meldung in meldungen:\n",
+    "                writer.writerow(meldung.to_row())\n",
+    "    \n",
+    "    print(f\"File '{filename}' created\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "c5d0bdbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_month(year, month):\n",
+    "    month_end_day = calendar.monthrange(year, month)[1]\n",
+    "    \n",
+    "    for i in range(0, month_end_day):\n",
+    "        store_meldungen_in_csv(year, month, i+1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9f3e24b",
+   "metadata": {},
+   "source": [
+    "## Auswertung: Wie viele Einträge pro Bundesland?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f600d3c",
+   "metadata": {},
+   "source": [
+    "Für fortführende Visualisierung und um zu testen, ob der Algorithmus richtig funktioniert, werden hier alle Pressemitteilungen aller Bundesländer ausgezählt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "b7c85078",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "counter = {}\n",
+    "\n",
+    "for filename in os.listdir('../data/'):\n",
+    "    if filename.endswith(\"_presseportal.csv\"):\n",
+    "        path = '../data/' + filename\n",
+    "        \n",
+    "        with open(path, 'r', encoding='UTF8') as f_in:\n",
+    "            reader = csv.reader(f_in)\n",
+    "            next(reader)\n",
+    "            for row in reader:\n",
+    "                bundesland = row[3]\n",
+    "                if bundesland not in counter:\n",
+    "                    counter[bundesland] = 1\n",
+    "                else:\n",
+    "                    counter[bundesland] += 1\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python-scientific kernel",
+   "language": "python",
+   "name": "python-scientific"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/ergebnisse_hackathon_repo/team-16/notebooks/simons-notebook.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/simons-notebook.ipynb
--- a/ergebnisse_hackathon_repo/team-16/notebooks/tweet-pm-counts.ipynb
+++ b/ergebnisse_hackathon_repo/team-16/notebooks/tweet-pm-counts.ipynb