|
7 | 7 | from langchain_core.documents import Document |
8 | 8 | from .base_node import BaseNode |
9 | 9 | from ..utils.remover import remover |
| 10 | +from ..utils.proxy_generator import proxy_generator |
10 | 11 |
|
11 | 12 |
|
12 | 13 | class FetchNode(BaseNode): |
@@ -37,13 +38,16 @@ class FetchNode(BaseNode): |
37 | 38 | to succeed. |
38 | 39 | """ |
39 | 40 |
|
40 | | - def __init__(self, input: str, output: List[str], node_name: str = "Fetch"): |
| 41 | + def __init__(self, input: str, output: List[str], num_prox: int = True, |
| 42 | + node_name: str = "Fetch"): |
41 | 43 | """ |
42 | 44 | Initializes the FetchHTMLNode with a node name and node type. |
43 | 45 | Arguments: |
44 | 46 | node_name (str): name of the node |
| 47 | + prox_rotation (bool): if you wamt to rotate proxies |
45 | 48 | """ |
46 | 49 | super().__init__(node_name, "node", input, output, 1) |
| 50 | + self.num_prox = num_prox |
47 | 51 |
|
48 | 52 | def execute(self, state): |
49 | 53 | """ |
@@ -78,7 +82,11 @@ def execute(self, state): |
78 | 82 |
|
79 | 83 | # if it is a URL |
80 | 84 | else: |
81 | | - loader = AsyncHtmlLoader(source) |
| 85 | + if self.num_prox > 1: |
| 86 | + loader = AsyncHtmlLoader( |
| 87 | + source, proxies=proxy_generator(self.num_prox)) |
| 88 | + else: |
| 89 | + loader = AsyncHtmlLoader(source) |
82 | 90 | document = loader.load() |
83 | 91 | compressed_document = [ |
84 | 92 | Document(page_content=remover(str(document)))] |
|
0 commit comments