forked from lukaszett/Knack-Scraper
Adds TransformNode to FuzzyFind Author Names
This commit is contained in:
parent
64df8fb328
commit
72765532d3
11 changed files with 696 additions and 58 deletions
|
|
@ -1,37 +0,0 @@
|
|||
"""Base transform node for data pipeline."""
|
||||
from abc import ABC, abstractmethod
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class TransformContext:
|
||||
"""Context object containing the dataframe for transformation."""
|
||||
|
||||
def __init__(self, df: pd.DataFrame):
|
||||
self.df = df
|
||||
|
||||
def get_dataframe(self) -> pd.DataFrame:
|
||||
"""Get the pandas dataframe from the context."""
|
||||
return self.df
|
||||
|
||||
|
||||
class TransformNode(ABC):
|
||||
"""Abstract base class for transformation nodes.
|
||||
|
||||
Each transform node implements a single transformation step
|
||||
that takes data from the database, transforms it, and
|
||||
potentially writes results back.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def run(self, con: sqlite3.Connection, context: TransformContext) -> TransformContext:
|
||||
"""Execute the transformation.
|
||||
|
||||
Args:
|
||||
con: SQLite database connection
|
||||
context: TransformContext containing the input dataframe
|
||||
|
||||
Returns:
|
||||
TransformContext with the transformed dataframe
|
||||
"""
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue