diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 05710dd9..5d558ec4 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -5,7 +5,7 @@ from decimal import Decimal from logging import getLogger from pathlib import Path -from typing import Annotated, Any, Self +from typing import TYPE_CHECKING, Annotated, Any, Self from pydantic import AliasChoices, BeforeValidator, Field, model_validator from typing_extensions import TypedDict @@ -23,6 +23,9 @@ ) from apify._utils import docs_group +if TYPE_CHECKING: + from collections.abc import Callable + logger = getLogger(__name__) @@ -34,6 +37,20 @@ def _transform_to_list(value: Any) -> list[str] | None: return value if isinstance(value, list) else str(value).split(',') +def _default_if_empty(*, default: Any) -> Callable[[Any], Any]: + """Build a validator that substitutes `default` for an empty-string env var. + + The Apify platform sometimes sets an env var to an empty string instead of leaving it unset. For fields whose + target type cannot parse `''` (datetimes, numbers, booleans, ...), passing the value straight through would crash + validation and, in turn, `Actor.init()`. Treat `''` as "not provided" and fall back to the field default instead. + """ + + def transform(value: Any) -> Any: + return default if value == '' else value + + return transform + + class ActorStorages(TypedDict): """Mapping of storage aliases to their IDs, grouped by storage type. @@ -59,9 +76,9 @@ def _load_storage_keys(data: None | str | ActorStorages) -> ActorStorages | None `ActorStorages` dict when set programmatically. Returns: - Normalized storage mapping, or `None` if the input is `None`. + Normalized storage mapping, or `None` if the input is `None` or an empty string. """ - if data is None: + if data is None or data == '': return None storage_mapping = json.loads(data) if isinstance(data, str) else data return { @@ -206,6 +223,7 @@ class Configuration(CrawleeConfiguration): alias='apify_dedicated_cpus', description='Number of CPU cores reserved for the actor, based on allocated memory', ), + BeforeValidator(_default_if_empty(default=None)), ] = None default_dataset_id: Annotated[ @@ -286,6 +304,7 @@ class Configuration(CrawleeConfiguration): alias='apify_is_at_home', description='True if the Actor is running on Apify servers', ), + BeforeValidator(_default_if_empty(default=False)), ] = False max_paid_dataset_items: Annotated[ @@ -294,7 +313,7 @@ class Configuration(CrawleeConfiguration): alias='actor_max_paid_dataset_items', description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit', ), - BeforeValidator(lambda val: val if val != '' else None), + BeforeValidator(_default_if_empty(default=None)), ] = None max_total_charge_usd: Annotated[ @@ -303,7 +322,7 @@ class Configuration(CrawleeConfiguration): alias='actor_max_total_charge_usd', description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit', ), - BeforeValidator(lambda val: val if val != '' else None), + BeforeValidator(_default_if_empty(default=None)), ] = None test_pay_per_event: Annotated[ @@ -312,6 +331,7 @@ class Configuration(CrawleeConfiguration): alias='actor_test_pay_per_event', description='Enable pay-per-event functionality for local development', ), + BeforeValidator(_default_if_empty(default=False)), ] = False meta_origin: Annotated[ @@ -328,6 +348,7 @@ class Configuration(CrawleeConfiguration): alias='apify_metamorph_after_sleep_millis', description='How long the Actor needs to wait before exiting after triggering a metamorph', ), + BeforeValidator(_default_if_empty(default=timedelta(minutes=5))), ] = timedelta(minutes=5) proxy_hostname: Annotated[ @@ -352,6 +373,7 @@ class Configuration(CrawleeConfiguration): alias='apify_proxy_port', description='Port to communicate with the Apify proxy', ), + BeforeValidator(_default_if_empty(default=8000)), ] = 8000 proxy_status_url: Annotated[ @@ -371,6 +393,7 @@ class Configuration(CrawleeConfiguration): ), description='Date when the Actor was started', ), + BeforeValidator(_default_if_empty(default=None)), ] = None timeout_at: Annotated[ @@ -382,7 +405,7 @@ class Configuration(CrawleeConfiguration): ), description='Date when the Actor will time out', ), - BeforeValidator(lambda val: val if val != '' else None), # We should accept empty environment variables as well + BeforeValidator(_default_if_empty(default=None)), ] = None standby_url: Annotated[ @@ -416,7 +439,7 @@ class Configuration(CrawleeConfiguration): alias='apify_user_is_paying', description='True if the user calling the Actor is paying user', ), - BeforeValidator(lambda val: False if val == '' else val), + BeforeValidator(_default_if_empty(default=False)), ] = False web_server_port: Annotated[ @@ -429,6 +452,7 @@ class Configuration(CrawleeConfiguration): description='TCP port for the Actor to start an HTTP server on' 'This server can be used to receive external messages or expose monitoring and control interfaces', ), + BeforeValidator(_default_if_empty(default=4321)), ] = 4321 web_server_url: Annotated[ @@ -470,7 +494,7 @@ class Configuration(CrawleeConfiguration): alias='apify_charged_actor_event_counts', description='Counts of events that were charged for the actor', ), - BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), + BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) and data else data or None), ] = None actor_storages: Annotated[ diff --git a/tests/unit/actor/test_configuration.py b/tests/unit/actor/test_configuration.py index 0fd686dd..689823f1 100644 --- a/tests/unit/actor/test_configuration.py +++ b/tests/unit/actor/test_configuration.py @@ -1,4 +1,5 @@ import json +from datetime import timedelta from decimal import Decimal from pathlib import Path @@ -373,6 +374,13 @@ def test_actor_pricing_info_env_var_empty_becomes_none(monkeypatch: pytest.Monke assert config.actor_pricing_info is None +def test_charged_event_counts_env_var_empty_string_becomes_none(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that an empty env var for charged_event_counts is converted to None instead of crashing.""" + monkeypatch.setenv('APIFY_CHARGED_ACTOR_EVENT_COUNTS', '') + config = ApifyConfiguration() + assert config.charged_event_counts is None + + def test_actor_storage_json_env_var(monkeypatch: pytest.MonkeyPatch) -> None: """Test that actor_storages_json is parsed from JSON env var.""" datasets = {'default': 'default_dataset_id', 'custom': 'custom_dataset_id'} @@ -392,3 +400,35 @@ def test_actor_storage_json_env_var(monkeypatch: pytest.MonkeyPatch) -> None: assert config.actor_storages['datasets'] == datasets assert config.actor_storages['request_queues'] == request_queues assert config.actor_storages['key_value_stores'] == key_value_stores + + +def test_actor_storages_env_var_empty_string_becomes_none(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that an empty env var for actor_storages is converted to None instead of crashing.""" + monkeypatch.setenv('ACTOR_STORAGES_JSON', '') + config = ApifyConfiguration() + assert config.actor_storages is None + + +@pytest.mark.parametrize( + ('env_var', 'attr', 'expected'), + [ + ('APIFY_STARTED_AT', 'started_at', None), + ('APIFY_TIMEOUT_AT', 'timeout_at', None), + ('APIFY_DEDICATED_CPUS', 'dedicated_cpus', None), + ('ACTOR_MAX_PAID_DATASET_ITEMS', 'max_paid_dataset_items', None), + ('ACTOR_MAX_TOTAL_CHARGE_USD', 'max_total_charge_usd', None), + ('APIFY_PROXY_PORT', 'proxy_port', 8000), + ('ACTOR_WEB_SERVER_PORT', 'web_server_port', 4321), + ('APIFY_METAMORPH_AFTER_SLEEP_MILLIS', 'metamorph_after_sleep', timedelta(minutes=5)), + ('APIFY_IS_AT_HOME', 'is_at_home', False), + ('ACTOR_TEST_PAY_PER_EVENT', 'test_pay_per_event', False), + ('APIFY_USER_IS_PAYING', 'user_is_paying', False), + ], +) +def test_typed_env_var_empty_string_falls_back_to_default( + monkeypatch: pytest.MonkeyPatch, env_var: str, attr: str, expected: object +) -> None: + """Platform may set a typed env var to '' instead of leaving it unset; that must not crash `Actor.init()`.""" + monkeypatch.setenv(env_var, '') + config = ApifyConfiguration() + assert getattr(config, attr) == expected