Add format key to LLM configuration, solve bug.

adrienpacifico · web-flow · commit 6ea2cbf7cc9a · 2026-01-07T16:12:56.000+01:00
Example does not work with current configuration. Adding json format solve the issue for llama3.2. Exemple is also curently long (1m26), small mods give the same results down to 10 sec on a M4 macbook. ---- Error with scrapegraphai v1.71.0, langchain-ollama v1.0.1 and langchain v1.2.1 ```python --------------------------------------------------------------------------- JSONDecodeError Traceback (most recent call last) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/output_parsers/json.py:84, in JsonOutputParser.parse_result(self, result, partial) 83 try: ---> 84 return parse_json_markdown(text) 85 except JSONDecodeError as e: File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/utils/json.py:164, in parse_json_markdown(json_string, parser) 163 json_str = json_string if match is None else match.group(2) --> 164 return _parse_json(json_str, parser=parser) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/utils/json.py:194, in _parse_json(json_str, parser) 193 # Parse the JSON string into a Python dictionary --> 194 return parser(json_str) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/utils/json.py:137, in parse_partial_json(s, strict) 134 # If we got here, we ran out of characters to remove 135 # and still couldn't parse the string as JSON, so return the parse error 136 # for the original string. --> 137 return json.loads(s, strict=strict) File ~/.local/share/uv/python/cpython-3.13.7-macos-aarch64-none/lib/python3.13/json/__init__.py:359, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw) 358 kw['parse_constant'] = parse_constant --> 359 return cls(**kw).decode(s) File ~/.local/share/uv/python/cpython-3.13.7-macos-aarch64-none/lib/python3.13/json/decoder.py:345, in JSONDecoder.decode(self, s, _w) 341 """Return the Python representation of ``s`` (a ``str`` instance 342 containing a JSON document). 343 344 """ --> 345 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 346 end = _w(s, end).end() File ~/.local/share/uv/python/cpython-3.13.7-macos-aarch64-none/lib/python3.13/json/decoder.py:363, in JSONDecoder.raw_decode(self, s, idx) 362 except StopIteration as err: --> 363 raise JSONDecodeError("Expecting value", s, err.value) from None 364 return obj, end JSONDecodeError: Expecting value: line 1 column 1 (char 0) The above exception was the direct cause of the following exception: OutputParserException Traceback (most recent call last) Cell In[11], line 22 15 smart_scraper_graph = SmartScraperGraph( 16 prompt="Extract useful information from the webpage, including a description of what the company does, founders and social media links", 17 source="https://scrapegraphai.com/", 18 config=graph_config 19 ) 21 # Run the pipeline ---> 22 result = smart_scraper_graph.run() 24 import json 25 print(json.dumps(result, indent=4)) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/graphs/smart_scraper_graph.py:303, in SmartScraperGraph.run(self) 295 """ 296 Executes the scraping process and returns the answer to the prompt. 297 298 Returns: 299 str: The answer to the prompt. 300 """ 302 inputs = {"user_prompt": self.prompt, self.input_key: self.source} --> 303 self.final_state, self.execution_info = self.graph.execute(inputs) 305 return self.final_state.get("answer", "No answer found.") File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/graphs/base_graph.py:363, in BaseGraph.execute(self, initial_state) 361 state, exec_info = (result["_state"], []) 362 else: --> 363 state, exec_info = self._execute_standard(initial_state) 365 # Print the result first 366 if "answer" in state: File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/graphs/base_graph.py:308, in BaseGraph._execute_standard(self, initial_state) 295 graph_execution_time = time.time() - start_time 296 log_graph_execution( 297 graph_name=self.graph_name, 298 source=source, (...) 306 exception=str(e), 307 ) --> 308 raise e 310 exec_info.append( 311 { 312 "node_name": "TOTAL RESULT", (...) 319 } 320 ) 322 graph_execution_time = time.time() - start_time File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/graphs/base_graph.py:281, in BaseGraph._execute_standard(self, initial_state) 278 schema = self._get_schema(current_node) 280 try: --> 281 result, node_exec_time, cb_data = self._execute_node( 282 current_node, state, llm_model, llm_model_name 283 ) 284 total_exec_time += node_exec_time 286 if cb_data: File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/graphs/base_graph.py:205, in BaseGraph._execute_node(self, current_node, state, llm_model, llm_model_name) 200 curr_time = time.time() 202 with self.callback_manager.exclusive_get_callback( 203 llm_model, llm_model_name 204 ) as cb: --> 205 result = current_node.execute(state) 206 node_exec_time = time.time() - curr_time 208 cb_data = None File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/nodes/generate_answer_node.py:193, in GenerateAnswerNode.execute(self, state) 190 chain = chain | output_parser 192 try: --> 193 answer = self.invoke_with_timeout( 194 chain, {"content": doc, "question": user_prompt}, self.timeout 195 ) 196 except (Timeout, json.JSONDecodeError) as e: 197 error_msg = ( 198 "Response timeout exceeded" 199 if isinstance(e, Timeout) 200 else "Invalid JSON response format" 201 ) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/scrapegraphai/nodes/generate_answer_node.py:79, in GenerateAnswerNode.invoke_with_timeout(self, chain, inputs, timeout) 77 try: 78 start_time = time.time() ---> 79 response = chain.invoke(inputs) 80 if time.time() - start_time > timeout: 81 raise Timeout(f"Response took longer than {timeout} seconds") File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/runnables/base.py:3151, in RunnableSequence.invoke(self, input, config, **kwargs) 3149 input_ = context.run(step.invoke, input_, config, **kwargs) 3150 else: -> 3151 input_ = context.run(step.invoke, input_, config) 3152 # finish the root run 3153 except BaseException as e: File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/output_parsers/base.py:201, in BaseOutputParser.invoke(self, input, config, **kwargs) 193 @OverRide 194 def invoke( 195 self, (...) 198 **kwargs: Any, 199 ) -> T: 200 if isinstance(input, BaseMessage): --> 201 return self._call_with_config( 202 lambda inner_input: self.parse_result( 203 [ChatGeneration(message=inner_input)] 204 ), 205 input, 206 config, 207 run_type="parser", 208 ) 209 return self._call_with_config( 210 lambda inner_input: self.parse_result([Generation(text=inner_input)]), 211 input, 212 config, 213 run_type="parser", 214 ) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/runnables/base.py:2058, in Runnable._call_with_config(self, func, input_, config, run_type, serialized, **kwargs) 2054 child_config = patch_config(config, callbacks=run_manager.get_child()) 2055 with set_config_context(child_config) as context: 2056 output = cast( 2057 "Output", -> 2058 context.run( 2059 call_func_with_variable_args, # type: ignore[arg-type] 2060 func, 2061 input_, 2062 config, 2063 run_manager, 2064 **kwargs, 2065 ), 2066 ) 2067 except BaseException as e: 2068 run_manager.on_chain_error(e) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/runnables/config.py:435, in call_func_with_variable_args(func, input, config, run_manager, **kwargs) 433 if run_manager is not None and accepts_run_manager(func): 434 kwargs["run_manager"] = run_manager --> 435 return func(input, **kwargs) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/output_parsers/base.py:202, in BaseOutputParser.invoke.<locals>.<lambda>(inner_input) 193 @OverRide 194 def invoke( 195 self, (...) 198 **kwargs: Any, 199 ) -> T: 200 if isinstance(input, BaseMessage): 201 return self._call_with_config( --> 202 lambda inner_input: self.parse_result( 203 [ChatGeneration(message=inner_input)] 204 ), 205 input, 206 config, 207 run_type="parser", 208 ) 209 return self._call_with_config( 210 lambda inner_input: self.parse_result([Generation(text=inner_input)]), 211 input, 212 config, 213 run_type="parser", 214 ) File ~/Projects/weekend_projects/coffee/coffee_llm_crawl/.venv/lib/python3.13/site-packages/langchain_core/output_parsers/json.py:87, in JsonOutputParser.parse_result(self, result, partial) 85 except JSONDecodeError as e: 86 msg = f"Invalid json output: {text}" ---> 87 raise OutputParserException(msg, llm_output=text) from e OutputParserException: Invalid json output: This text is a web page for the company ScrapeGraphAI, which provides a web scraping API. The page includes information about the product, its features, and customer testimonials. Here's a breakdown of the different sections: 1. **Introduction**: A brief introduction to ScrapeGraphAI, its mission, and its unique approach to web data extraction. 2. **Testimonials**: Quotes from satisfied customers who have used ScrapeGraphAI for their web scraping needs. 3. **Team**: Information about the team behind ScrapeGraphAI, including their backgrounds and expertise. 4. **Give your AI Agent superpowers with lightning-fast web data!**: A call to action to get started with ScrapeGraphAI's API, which promises to provide fast and reliable web data for AI agents. 5. **ScrapeGraphAI**: A link to the company's GitHub page, where users can find more information about the project. 6. **Contact Us**: Information on how to contact ScrapeGraphAI, including a contact email address. 7. **Legal Pages**: Links to the company's privacy policy, terms of service, and manifesto. The text also includes several links to external websites, such as LinkedIn profiles, GitHub repositories, and Reddit communities, where users can find more information about ScrapeGraphAI and its community. For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ```
diff --git a/README.md b/README.md
@@ -73,7 +73,8 @@ from scrapegraphai.graphs import SmartScraperGraph
 graph_config = {
     "llm": {
         "model": "ollama/llama3.2",
-        "model_tokens": 8192
+        "model_tokens": 8192,
+        "format": "json",
     },
     "verbose": True,
     "headless": False,