Knack-Scraper/transform/base.py

37 lines
1 KiB
Python

"""Base transform node for data pipeline."""
from abc import ABC, abstractmethod
import sqlite3
import pandas as pd
class TransformContext:
"""Context object containing the dataframe for transformation."""
def __init__(self, df: pd.DataFrame):
self.df = df
def get_dataframe(self) -> pd.DataFrame:
"""Get the pandas dataframe from the context."""
return self.df
class TransformNode(ABC):
"""Abstract base class for transformation nodes.
Each transform node implements a single transformation step
that takes data from the database, transforms it, and
potentially writes results back.
"""
@abstractmethod
def run(self, con: sqlite3.Connection, context: TransformContext) -> TransformContext:
"""Execute the transformation.
Args:
con: SQLite database connection
context: TransformContext containing the input dataframe
Returns:
TransformContext with the transformed dataframe
"""
pass