Adds simple tweet search engine UI
This commit is contained in:
		
							parent
							
								
									e04a681d5a
								
							
						
					
					
						commit
						d05100d1f5
					
				
					 6 changed files with 484874 additions and 9 deletions
				
			
		
							
								
								
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -162,3 +162,4 @@ cython_debug/
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
data/tfidf_matrix.pckl
 | 
			
		||||
data/tfidf_vectorizer.pckl
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										7
									
								
								app.py
									
										
									
									
									
								
							
							
						
						
									
										7
									
								
								app.py
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -13,9 +13,9 @@ footer_html: str = load_html_str_from_file(os.path.join("www", "footer.html"))
 | 
			
		|||
 | 
			
		||||
def nav_controls() -> List[NavSetArg]:
 | 
			
		||||
    return [
 | 
			
		||||
        ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("Intro"), value="intro"),
 | 
			
		||||
        ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("intro"), value="intro"),
 | 
			
		||||
        ui.nav(ui.h5("Analyse"), "Analyse"),
 | 
			
		||||
        ui.nav(ui.h5("Suchmaschine"), mod_searchable.searchable_ui("Suchmaschine")),
 | 
			
		||||
        ui.nav(ui.h5("Suchmaschine"), mod_searchable.searchable_ui("search_engine"), value="search_engine"),
 | 
			
		||||
        ui.nav_control(
 | 
			
		||||
            ui.a(
 | 
			
		||||
                ui.h5("AG-Link"),
 | 
			
		||||
| 
						 | 
				
			
			@ -58,7 +58,8 @@ app_ui = ui.page_navbar(
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def server(input: Inputs, output: Outputs, session: Session):
 | 
			
		||||
    mod_welcome.welcome_server("Intro")
 | 
			
		||||
    mod_welcome.welcome_server("intro")
 | 
			
		||||
    mod_searchable.searchable_server("search_engine")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static_dir = Path(__file__).parent / "www"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										151691
									
								
								data/tweet_relevance.json
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										151691
									
								
								data/tweet_relevance.json
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										333095
									
								
								data/tweets_all_combined.csv
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										333095
									
								
								data/tweets_all_combined.csv
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -1,15 +1,93 @@
 | 
			
		|||
from shiny import module, ui, render, Inputs, Outputs, Session
 | 
			
		||||
from sklearn.feature_extraction.text import TfidfVectorizer
 | 
			
		||||
from sklearn.metrics.pairwise import cosine_similarity
 | 
			
		||||
import pandas as pd
 | 
			
		||||
import numpy as np
 | 
			
		||||
import pickle
 | 
			
		||||
 | 
			
		||||
tfidf_matrix_path = "data/tfidf_matrix.pckl"
 | 
			
		||||
tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
 | 
			
		||||
relevance_score_path = "data/tweet_relevance.json"
 | 
			
		||||
tweets_path = "data/tweets_all_combined.csv"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
print("Loading data from storage")
 | 
			
		||||
tweets = pd.read_csv(tweets_path)
 | 
			
		||||
relevance_score = pd.read_csv(relevance_score_path)
 | 
			
		||||
 | 
			
		||||
tfidf_matrix = None
 | 
			
		||||
with open(tfidf_matrix_path, "rb") as f:
 | 
			
		||||
    tfidf_matrix = pickle.load(f)
 | 
			
		||||
 | 
			
		||||
tfidf_vectorizer: TfidfVectorizer = None
 | 
			
		||||
with open(tfidf_vectorizer_path, "rb") as f:
 | 
			
		||||
    tfidf_vectorizer = pickle.load(f)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
tweets["relevance_score"] = relevance_score["relevance_score"]
 | 
			
		||||
tweets = tweets.drop(["user_id", "measured_at", "tweet_id"], axis=1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def search_query(query: str, limit: int = 5) -> pd.DataFrame:
 | 
			
		||||
    query_vec = tfidf_vectorizer.transform([query])
 | 
			
		||||
    similarity = cosine_similarity(query_vec, tfidf_matrix).flatten()
 | 
			
		||||
 | 
			
		||||
    filtered = np.where(similarity != 0)[0]
 | 
			
		||||
    indices = np.argsort(-similarity[filtered])
 | 
			
		||||
    correct_indices = filtered[indices]
 | 
			
		||||
    result = tweets.iloc[correct_indices]
 | 
			
		||||
 | 
			
		||||
    if not len(result):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    overall = result['relevance_score'] * similarity[correct_indices]
 | 
			
		||||
    return result.loc[overall.sort_values(ascending=False).index].head(limit)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@module.ui
 | 
			
		||||
def searchable_ui():
 | 
			
		||||
    return ui.div("hello world")
 | 
			
		||||
    return ui.div(
 | 
			
		||||
        ui.h2("Tweet Suchmaschine"),
 | 
			
		||||
        ui.input_text("search_input", "Suche:", placeholder="Gebe Suchterm ein"),
 | 
			
		||||
        ui.HTML("<br>"),
 | 
			
		||||
        ui.output_ui(id="searchable_tweet_ui"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ module.server
 | 
			
		||||
def searchable_server(input: Inputs, output: Outputs, session: Session, starting_value=0):
 | 
			
		||||
def searchable_server(input: Inputs, output: Outputs, session: Session):
 | 
			
		||||
    @output
 | 
			
		||||
    @render.ui
 | 
			
		||||
    def dataset_infos():
 | 
			
		||||
    def searchable_tweet_ui():
 | 
			
		||||
 | 
			
		||||
        return ui.markdown("")
 | 
			
		||||
        query = input.search_input()
 | 
			
		||||
 | 
			
		||||
        result_pd = search_query(query, 15)
 | 
			
		||||
 | 
			
		||||
        style = "text-align: center; padding-top: 0.5em;"
 | 
			
		||||
        tweet_ui = ui.page_fluid()
 | 
			
		||||
 | 
			
		||||
        if result_pd is None:
 | 
			
		||||
            return ui.div(
 | 
			
		||||
                ui.h5("Keine Ergebnisse gefunden!")
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # iterating over dataframe is bad but needed
 | 
			
		||||
        for idx, row in result_pd.iterrows():
 | 
			
		||||
            tweet_ui.append(
 | 
			
		||||
                ui.div(
 | 
			
		||||
                    ui.row(
 | 
			
		||||
                        ui.column(9, ui.markdown(f"**{row['user_name']}** *@{row['handle']}*"), style=style),
 | 
			
		||||
                        ui.column(3, ui.p(f"{row['created_at']}"), style=style),
 | 
			
		||||
                    ),
 | 
			
		||||
                    ui.row(
 | 
			
		||||
                        ui.column(12, ui.HTML(str(row["tweet_text"]).replace("\\n", "<br>")), style=style + "font-size: 20px; padding:1em;"),
 | 
			
		||||
                    ),
 | 
			
		||||
                    ui.row(
 | 
			
		||||
                        ui.column(3, ui.p(f"👍 {row['like_count']}"), style=style),
 | 
			
		||||
                        ui.column(3, ui.p(f"⟲ {row['retweet_count']}"), style=style),
 | 
			
		||||
                        ui.column(3, ui.p(f"↪ {row['reply_count']}"), style=style),
 | 
			
		||||
                        ui.column(3, ui.p(f"💬 {row['quote_count']}"), style=style),
 | 
			
		||||
                    ), style="border: 1px solid #954; margin-bottom: 1em;"))
 | 
			
		||||
 | 
			
		||||
        return tweet_ui
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,7 +18,6 @@ def welcome_ui():
 | 
			
		|||
 | 
			
		||||
            [0]: https://ag-link.xyz
 | 
			
		||||
            """),
 | 
			
		||||
        # ui.output_text("dataset_infos"),
 | 
			
		||||
        ui.output_ui("dataset_infos"),
 | 
			
		||||
        ui.h3("Ursprung der Idee"),
 | 
			
		||||
        ui.markdown("""
 | 
			
		||||
| 
						 | 
				
			
			@ -52,7 +51,7 @@ with open("data/general_analysis_results.json", "r") as f:
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
@ module.server
 | 
			
		||||
def welcome_server(input, output, session, starting_value=0):
 | 
			
		||||
def welcome_server(input, output, session):
 | 
			
		||||
    @output
 | 
			
		||||
    @render.ui
 | 
			
		||||
    def dataset_infos():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue