6 changed files with 6 additions and 484889 deletions
--- a/.gitignore
+++ b/.gitignore
@ -160,6 +160,3 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

-
-data/tfidf_matrix.pckl
-data/tfidf_vectorizer.pckl
--- a/app.py
+++ b/app.py
@ -2,7 +2,7 @@ from pathlib import Path
 from typing import List
 from shiny import App, ui, Inputs, Outputs, Session
 from shiny.types import NavSetArg
-from src import mod_welcome, mod_searchable
+from src import mod_welcome
 from src.util import load_html_str_from_file

 import os
@ -13,9 +13,8 @@ footer_html: str = load_html_str_from_file(os.path.join("www", "footer.html"))

 def nav_controls() -> List[NavSetArg]:
    return [
-        ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("intro"), value="intro"),
-        ui.nav(ui.h5("Analyse"), "Analyse"),
-        ui.nav(ui.h5("Suchmaschine"), mod_searchable.searchable_ui("search_engine"), value="search_engine"),
+        ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("Intro"), value="intro"),
+        ui.nav(ui.h5("Datensatz Analyse"), "Datensatz Analyse"),
        ui.nav_control(
            ui.a(
                ui.h5("AG-Link"),
@ -58,8 +57,7 @@ app_ui = ui.page_navbar(


 def server(input: Inputs, output: Outputs, session: Session):
-    mod_welcome.welcome_server("intro")
-    mod_searchable.searchable_server("search_engine")
+    mod_welcome.welcome_server("Intro")


 static_dir = Path(__file__).parent / "www"
--- a/data/tweet_relevance.json
+++ b/data/tweet_relevance.json
--- a/data/tweets_all_combined.csv
+++ b/data/tweets_all_combined.csv
--- a/src/mod_searchable.py
+++ b/src/mod_searchable.py
@ -1,93 +0,0 @@
-from shiny import module, ui, render, Inputs, Outputs, Session
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-import pandas as pd
-import numpy as np
-import pickle
-
-tfidf_matrix_path = "data/tfidf_matrix.pckl"
-tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
-relevance_score_path = "data/tweet_relevance.json"
-tweets_path = "data/tweets_all_combined.csv"
-
-
-print("Loading data from storage")
-tweets = pd.read_csv(tweets_path)
-relevance_score = pd.read_csv(relevance_score_path)
-
-tfidf_matrix = None
-with open(tfidf_matrix_path, "rb") as f:
-    tfidf_matrix = pickle.load(f)
-
-tfidf_vectorizer: TfidfVectorizer = None
-with open(tfidf_vectorizer_path, "rb") as f:
-    tfidf_vectorizer = pickle.load(f)
-
-
-tweets["relevance_score"] = relevance_score["relevance_score"]
-tweets = tweets.drop(["user_id", "measured_at", "tweet_id"], axis=1)
-
-
-def search_query(query: str, limit: int = 5) -> pd.DataFrame:
-    query_vec = tfidf_vectorizer.transform([query])
-    similarity = cosine_similarity(query_vec, tfidf_matrix).flatten()
-
-    filtered = np.where(similarity != 0)[0]
-    indices = np.argsort(-similarity[filtered])
-    correct_indices = filtered[indices]
-    result = tweets.iloc[correct_indices]
-
-    if not len(result):
-        return None
-
-    overall = result['relevance_score'] * similarity[correct_indices]
-    return result.loc[overall.sort_values(ascending=False).index].head(limit)
-
-
-@module.ui
-def searchable_ui():
-    return ui.div(
-        ui.h2("Tweet Suchmaschine"),
-        ui.input_text("search_input", "Suche:", placeholder="Gebe Suchterm ein"),
-        ui.HTML("<br>"),
-        ui.output_ui(id="searchable_tweet_ui"),
-    )
-
-
-@ module.server
-def searchable_server(input: Inputs, output: Outputs, session: Session):
-    @output
-    @render.ui
-    def searchable_tweet_ui():
-
-        query = input.search_input()
-
-        result_pd = search_query(query, 15)
-
-        style = "text-align: center; padding-top: 0.5em;"
-        tweet_ui = ui.page_fluid()
-
-        if result_pd is None:
-            return ui.div(
-                ui.h5("Keine Ergebnisse gefunden!")
-            )
-
-        # iterating over dataframe is bad but needed
-        for idx, row in result_pd.iterrows():
-            tweet_ui.append(
-                ui.div(
-                    ui.row(
-                        ui.column(9, ui.markdown(f"**{row['user_name']}** *@{row['handle']}*"), style=style),
-                        ui.column(3, ui.p(f"{row['created_at']}"), style=style),
-                    ),
-                    ui.row(
-                        ui.column(12, ui.HTML(str(row["tweet_text"]).replace("\\n", "<br>")), style=style + "font-size: 20px; padding:1em;"),
-                    ),
-                    ui.row(
-                        ui.column(3, ui.p(f"👍 {row['like_count']}"), style=style),
-                        ui.column(3, ui.p(f"⟲ {row['retweet_count']}"), style=style),
-                        ui.column(3, ui.p(f"↪ {row['reply_count']}"), style=style),
-                        ui.column(3, ui.p(f"💬 {row['quote_count']}"), style=style),
-                    ), style="border: 1px solid #954; margin-bottom: 1em;"))
-
-        return tweet_ui
--- a/src/mod_welcome.py
+++ b/src/mod_welcome.py
@ -18,6 +18,7 @@ def welcome_ui():

            [0]: https://ag-link.xyz
            """),
+        # ui.output_text("dataset_infos"),
        ui.output_ui("dataset_infos"),
        ui.h3("Ursprung der Idee"),
        ui.markdown("""
@ -51,7 +52,7 @@ with open("data/general_analysis_results.json", "r") as f:


@ module.server
-def welcome_server(input, output, session):
+def welcome_server(input, output, session, starting_value=0):
    @output
    @render.ui
    def dataset_infos():