Completes search engine for tweets
This commit is contained in:
parent
5acc0642b6
commit
8787366ed4
1 changed files with 67 additions and 16 deletions
|
@ -24,6 +24,7 @@ def replace_link(match):
|
||||||
url = match.group(0)
|
url = match.group(0)
|
||||||
return f'<a href="{url}">{url}</a>'
|
return f'<a href="{url}">{url}</a>'
|
||||||
|
|
||||||
|
|
||||||
def replace_hastag(match):
|
def replace_hastag(match):
|
||||||
hashtag = match.group(0)
|
hashtag = match.group(0)
|
||||||
name = str(hashtag).removeprefix("#")
|
name = str(hashtag).removeprefix("#")
|
||||||
|
@ -44,10 +45,10 @@ with open(tfidf_vectorizer_path, "rb") as f:
|
||||||
|
|
||||||
|
|
||||||
tweets["relevance_score"] = relevance_score["relevance_score"]
|
tweets["relevance_score"] = relevance_score["relevance_score"]
|
||||||
tweets = tweets.drop(["user_id", "measured_at", "tweet_id"], axis=1)
|
tweets = tweets.drop(["user_id", "measured_at"], axis=1)
|
||||||
|
|
||||||
|
|
||||||
def search_query(query: str, limit: int = 5) -> pd.DataFrame:
|
def search_query(query: str, limit: int = 5, sorting_method: str = "score") -> (pd.DataFrame, int):
|
||||||
query_vec = tfidf_vectorizer.transform([query])
|
query_vec = tfidf_vectorizer.transform([query])
|
||||||
similarity = cosine_similarity(query_vec, tfidf_matrix).flatten()
|
similarity = cosine_similarity(query_vec, tfidf_matrix).flatten()
|
||||||
|
|
||||||
|
@ -59,16 +60,35 @@ def search_query(query: str, limit: int = 5) -> pd.DataFrame:
|
||||||
if not len(result):
|
if not len(result):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if limit == -1:
|
||||||
|
limit = len(result)
|
||||||
|
|
||||||
|
results = None
|
||||||
|
if sorting_method == "score":
|
||||||
overall = (0.6 * result['relevance_score']) * similarity[correct_indices]
|
overall = (0.6 * result['relevance_score']) * similarity[correct_indices]
|
||||||
return result.loc[overall.sort_values(ascending=False).index].head(limit)
|
results = result.loc[overall.sort_values(ascending=False).index].head(limit)
|
||||||
|
elif sorting_method == "date_new":
|
||||||
|
results = result.sort_values(by="created_at", ascending=False).head(limit)
|
||||||
|
elif sorting_method == "date_old":
|
||||||
|
results = result.sort_values(by="created_at", ascending=True).head(limit)
|
||||||
|
|
||||||
|
return results, len(result)
|
||||||
|
|
||||||
|
|
||||||
@ module.ui
|
@ module.ui
|
||||||
def searchable_ui():
|
def searchable_ui():
|
||||||
return ui.div(
|
return ui.div(
|
||||||
ui.h2("Tweet Suchmaschine"),
|
ui.h2("Tweet Suchmaschine"),
|
||||||
ui.input_text("search_input", "Suche:", placeholder="Gebe Suchterm ein", value="Leipzig"),
|
ui.HTML("<hr>"),
|
||||||
ui.HTML("<br>"),
|
ui.row(
|
||||||
|
ui.column(6, ui.input_text("search_input", "Suche", placeholder="Gib Suchterm ein", value="Leipzig", width="100%")),
|
||||||
|
ui.column(3,
|
||||||
|
ui.input_select("sorting_method", "Sortierung", {"score": "Relevanz", "date_new": "Neuste Zuerst", "date_old": "Älteste Zuerst"}, selected="score", selectize=True, width="12em"),
|
||||||
|
ui.input_select("tweet_count", "Ergebnisse", {"5": "5", "20": "20", "50": "50", "all": "alle"}, selected="5", selectize=True, width="12em"),
|
||||||
|
style="display: flex; flex-direction: column; align-items: center; justify-content: center;"),
|
||||||
|
style="justify-content:space-between;"
|
||||||
|
|
||||||
|
),
|
||||||
ui.output_ui(id="searchable_tweet_ui"),
|
ui.output_ui(id="searchable_tweet_ui"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -80,11 +100,20 @@ def searchable_server(input: Inputs, output: Outputs, session: Session):
|
||||||
def searchable_tweet_ui():
|
def searchable_tweet_ui():
|
||||||
|
|
||||||
query = input.search_input()
|
query = input.search_input()
|
||||||
|
sorting_method = input.sorting_method()
|
||||||
|
result_count = str(input.tweet_count())
|
||||||
|
if result_count == "all":
|
||||||
|
result_count = -1
|
||||||
|
else:
|
||||||
|
result_count = int(result_count)
|
||||||
|
|
||||||
result_pd = search_query(query, 25)
|
result_pd, found_tweets = search_query(query, result_count, sorting_method=sorting_method)
|
||||||
|
|
||||||
style = "text-align: center;"
|
style = "text-align: center;"
|
||||||
tweet_ui = ui.page_fluid()
|
tweet_ui = ui.page_fluid(
|
||||||
|
ui.HTML(f"Gesamt gefundene Tweets: <b>{found_tweets}</b>"),
|
||||||
|
ui.HTML("<hr><br>"),
|
||||||
|
)
|
||||||
|
|
||||||
if result_pd is None:
|
if result_pd is None:
|
||||||
return ui.div(
|
return ui.div(
|
||||||
|
@ -99,25 +128,47 @@ def searchable_server(input: Inputs, output: Outputs, session: Session):
|
||||||
tweet_text = re.sub(link_regex, replace_link, tweet_text)
|
tweet_text = re.sub(link_regex, replace_link, tweet_text)
|
||||||
tweet_text = re.sub(hashtag_regex, replace_hastag, tweet_text)
|
tweet_text = re.sub(hashtag_regex, replace_hastag, tweet_text)
|
||||||
|
|
||||||
|
tweet_link = f"https://twitter.com/{row['handle']}/status/{row['tweet_id']}"
|
||||||
|
|
||||||
|
user_handle = row['handle']
|
||||||
|
user_name = row['user_name']
|
||||||
|
|
||||||
tweet_ui.append(
|
tweet_ui.append(
|
||||||
ui.div(
|
ui.div(
|
||||||
ui.row(
|
ui.row(
|
||||||
ui.column(6, ui.markdown(
|
ui.column(6, ui.HTML(
|
||||||
f"**{row['user_name']}** *@{row['handle']}*"), style=style + "padding-top: 0.5em;"),
|
f"<b>{user_name}</b> <a href='https://twitter.com/{user_handle}' style='text-decoration: none;'>@{user_handle}</a>"), style=style + "padding-top: 1.5em; "),
|
||||||
ui.column(6, ui.p(f"{row['created_at']}"), style=style + "padding-top: 0.5em;"),
|
ui.column(6, ui.p(f"{row['created_at']}"), style=style + "padding-top: 1.5em;"),
|
||||||
),
|
),
|
||||||
ui.row(
|
ui.row(
|
||||||
ui.column(12, ui.div(ui.HTML(tweet_text), style=style + "font-size: 20px; margin: 1em; padding: 0.25em; border: 1px solid #bbb;")),
|
ui.column(12, ui.HTML("<hr>"),
|
||||||
|
ui.HTML(f"""
|
||||||
|
<style type="text/css">
|
||||||
|
.box {{
|
||||||
|
cursor: pointer;
|
||||||
|
text-align: center;
|
||||||
|
font-size: 20px;
|
||||||
|
padding-left: 1.5em;
|
||||||
|
padding-right: 1.5em;
|
||||||
|
}}
|
||||||
|
.box:hover {{
|
||||||
|
opacity: 0.7;
|
||||||
|
background: #eee;
|
||||||
|
}}
|
||||||
|
</style>
|
||||||
|
<div class="box"; onclick="location.href='{tweet_link}';">{tweet_text}</div></a>
|
||||||
|
"""),
|
||||||
|
ui.HTML("<hr>")),
|
||||||
),
|
),
|
||||||
ui.row(
|
ui.row(
|
||||||
ui.column(3, ui.HTML(reply_html_svg), ui.p(
|
ui.column(3, ui.HTML(reply_html_svg), ui.p(
|
||||||
f"{row['reply_count']}"), style=style, title="Anzahl Antworten"),
|
f"{row['reply_count']}"), style=style, title="Antworten"),
|
||||||
ui.column(3, ui.HTML(retweet_html_svg), ui.p(
|
ui.column(3, ui.HTML(retweet_html_svg), ui.p(
|
||||||
f"{row['retweet_count']}"), style=style, title="Anzahl Retweets"),
|
f"{row['retweet_count']}"), style=style, title="Retweets"),
|
||||||
ui.column(3, ui.HTML(like_html_svg), ui.p(
|
ui.column(3, ui.HTML(like_html_svg), ui.p(
|
||||||
f"{row['like_count']}"), style=style, title="Anzahl Likes"),
|
f"{row['like_count']}"), style=style, title="Likes"),
|
||||||
ui.column(3, ui.HTML(quote_count_svg), ui.p(
|
ui.column(3, ui.HTML(quote_count_svg), ui.p(
|
||||||
f"{row['quote_count']}"), style=style, title="Anzahl Quotes"),
|
f"{row['quote_count']}"), style=style, title="Quotes"),
|
||||||
), style="border: 2px solid #119; margin-bottom: 1.5em; border-radius: 10px;"))
|
), style="border: 2px solid #119; margin-bottom: 1.5em; border-radius: 10px;"))
|
||||||
|
|
||||||
return tweet_ui
|
return tweet_ui
|
||||||
|
|
Loading…
Reference in a new issue