Implement Nodes to compute text embeddings

This commit is contained in:
quorploop 2025-12-24 17:58:23 +01:00
parent 72765532d3
commit 49239e7e25
9 changed files with 505 additions and 25 deletions

View file

@ -227,7 +227,9 @@ def main():
num_threads=num_threads,
)
postdf.to_sql("posts", con, if_exists="append")
# Drop category and tags columns as they're stored in separate tables
postdf = postdf.drop(columns=['category', 'tags'])
postdf.to_sql("posts", con, if_exists="append", index=False)
# Tags
tag_dim, tag_map = build_dimension_and_mapping(postdf, 'tags', 'tag')