forked from lukaszett/Knack-Scraper
37 lines
1 KiB
Python
37 lines
1 KiB
Python
"""Base transform node for data pipeline."""
|
|
from abc import ABC, abstractmethod
|
|
import sqlite3
|
|
import pandas as pd
|
|
|
|
|
|
class TransformContext:
|
|
"""Context object containing the dataframe for transformation."""
|
|
|
|
def __init__(self, df: pd.DataFrame):
|
|
self.df = df
|
|
|
|
def get_dataframe(self) -> pd.DataFrame:
|
|
"""Get the pandas dataframe from the context."""
|
|
return self.df
|
|
|
|
|
|
class TransformNode(ABC):
|
|
"""Abstract base class for transformation nodes.
|
|
|
|
Each transform node implements a single transformation step
|
|
that takes data from the database, transforms it, and
|
|
potentially writes results back.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def run(self, con: sqlite3.Connection, context: TransformContext) -> TransformContext:
|
|
"""Execute the transformation.
|
|
|
|
Args:
|
|
con: SQLite database connection
|
|
context: TransformContext containing the input dataframe
|
|
|
|
Returns:
|
|
TransformContext with the transformed dataframe
|
|
"""
|
|
pass
|