Implement Nodes to compute text embeddings

This commit is contained in:
quorploop 2025-12-24 17:58:23 +01:00
parent 72765532d3
commit 49239e7e25
9 changed files with 505 additions and 25 deletions

View file

@ -56,9 +56,9 @@ def main():
# Load posts data
logger.info("Loading posts from database")
sql = "SELECT id, author FROM posts WHERE author IS NOT NULL AND (is_cleaned IS NULL OR is_cleaned = 0) LIMIT ?"
MAX_CLEANED_POSTS = os.environ.get("MAX_CLEANED_POSTS", 100)
df = pd.read_sql(sql, con, params=[MAX_CLEANED_POSTS])
sql = "SELECT * FROM posts WHERE author IS NOT NULL AND (is_cleaned IS NULL OR is_cleaned = 0)"
# MAX_CLEANED_POSTS = os.environ.get("MAX_CLEANED_POSTS", 100)
df = pd.read_sql(sql, con)
logger.info(f"Loaded {len(df)} uncleaned posts with authors")
if df.empty: