Implements Feature to cleanup authors freetext field

This commit is contained in:
quorploop 2025-12-21 21:18:05 +01:00
parent bcd210ce01
commit 64df8fb328
14 changed files with 804 additions and 310 deletions

37
transform/base.py Normal file
View file

@ -0,0 +1,37 @@
"""Base transform node for data pipeline."""
from abc import ABC, abstractmethod
import sqlite3
import pandas as pd
class TransformContext:
"""Context object containing the dataframe for transformation."""
def __init__(self, df: pd.DataFrame):
self.df = df
def get_dataframe(self) -> pd.DataFrame:
"""Get the pandas dataframe from the context."""
return self.df
class TransformNode(ABC):
"""Abstract base class for transformation nodes.
Each transform node implements a single transformation step
that takes data from the database, transforms it, and
potentially writes results back.
"""
@abstractmethod
def run(self, con: sqlite3.Connection, context: TransformContext) -> TransformContext:
"""Execute the transformation.
Args:
con: SQLite database connection
context: TransformContext containing the input dataframe
Returns:
TransformContext with the transformed dataframe
"""
pass