forked from lukaszett/Knack-Scraper
Makes transformer script executable via cli
This commit is contained in:
parent
8fae350b34
commit
7c2e34906e
11 changed files with 648 additions and 37 deletions
|
|
@ -214,8 +214,15 @@ def create_default_pipeline(device: str = "cpu",
|
|||
"""
|
||||
from author_node import NerAuthorNode, FuzzyAuthorNode
|
||||
from embeddings_node import TextEmbeddingNode, UmapNode
|
||||
from url_node import URLNode
|
||||
|
||||
pipeline = ParallelPipeline(max_workers=max_workers, use_processes=False)
|
||||
|
||||
pipeline.add_node(NodeConfig(
|
||||
node_class=URLNode,
|
||||
dependencies=[],
|
||||
name='URLNode'
|
||||
))
|
||||
|
||||
# Add AuthorNode (no dependencies)
|
||||
pipeline.add_node(NodeConfig(
|
||||
|
|
@ -243,7 +250,7 @@ def create_default_pipeline(device: str = "cpu",
|
|||
'device': device,
|
||||
'model_path': os.environ.get('GTE_MODEL_PATH')
|
||||
},
|
||||
dependencies=[],
|
||||
dependencies=['AuthorNode'],
|
||||
name='TextEmbeddingNode'
|
||||
))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue