diff --git a/pyproject.toml b/pyproject.toml index bc05d589..c3add790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Topic :: Software Development :: Libraries", + "Typing :: Typed", ] keywords = [ "apify", @@ -239,7 +240,6 @@ exclude-newer = "24 hours" [tool.uv.exclude-newer-package] # Allow internal Apify packages to install immediately. apify-client = false -apify-shared = false apify_fingerprint_datapoints = false crawlee = false diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 23423296..f3ecad83 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -186,7 +186,6 @@ async def __aenter__(self) -> Self: # Update the global Actor proxy to refer to this instance. cast('Proxy', Actor).__wrapped__ = self # ty: ignore[invalid-assignment] self._is_exiting = False - self._was_final_persist_state_emitted = False # Initialize the storage client and register it in the service locator. _ = self._storage_client diff --git a/src/apify/_consts.py b/src/apify/_consts.py index 2d2b62ac..7bbee006 100644 --- a/src/apify/_consts.py +++ b/src/apify/_consts.py @@ -21,9 +21,6 @@ ) """Regex matching encrypted input values with base64-encoded components.""" -EXIT_CODE_SUCCESS = 0 -"""Exit code indicating that the Actor finished successfully.""" - EXIT_CODE_ERROR_USER_FUNCTION_THREW = 91 """Exit code indicating that the Actor's main function raised an exception.""" diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index c7c2bc16..6065c45c 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -109,8 +109,6 @@ def private_decrypt( decipher_bytes = decryptor.update(encrypted_data_bytes) + decryptor.finalize() except InvalidTagException as exc: raise ValueError('Decryption failed, malformed encrypted value or password.') from exc - except Exception: - raise return decipher_bytes.decode('utf-8') diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index 023f25e3..fec4d4dd 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -1,12 +1,10 @@ from __future__ import annotations -import ipaddress import json import re from dataclasses import dataclass, field from re import Pattern from typing import TYPE_CHECKING, Any -from urllib.parse import urljoin, urlparse import impit from yarl import URL @@ -38,24 +36,6 @@ SESSION_ID_MAX_LENGTH = 50 -def is_url(url: str) -> bool: - """Check if the given string is a valid URL.""" - try: - parsed_url = urlparse(urljoin(url, '/')) - has_all_parts = all([parsed_url.scheme, parsed_url.netloc, parsed_url.path]) - is_domain = '.' in parsed_url.netloc - is_localhost = parsed_url.netloc == 'localhost' - try: - ipaddress.ip_address(parsed_url.netloc) - is_ip_address = True - except Exception: - is_ip_address = False - - return has_all_parts and any([is_domain, is_localhost, is_ip_address]) - except Exception: - return False - - def _check( value: Any, *, diff --git a/src/apify/_utils.py b/src/apify/_utils.py index ca099f70..8469ae97 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -6,7 +6,6 @@ import sys from collections.abc import Callable from contextlib import asynccontextmanager -from enum import Enum from functools import wraps from importlib import metadata from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast @@ -123,13 +122,6 @@ def wrapper(func: T) -> T: return wrapper -def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any: - """Extract the value of an enumeration member if it is an Enum, otherwise return the original value.""" - if isinstance(maybe_enum_member, Enum): - return maybe_enum_member.value - return maybe_enum_member - - class ReentrantLock: """A reentrant lock implementation for asyncio using asyncio.Lock.""" diff --git a/src/apify/scrapy/_logging_config.py b/src/apify/scrapy/_logging_config.py index 2f51074a..19d28c3d 100644 --- a/src/apify/scrapy/_logging_config.py +++ b/src/apify/scrapy/_logging_config.py @@ -10,7 +10,7 @@ # Define logger names. _PRIMARY_LOGGERS = ['apify', 'apify_client', 'scrapy'] -_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted'] +_SUPPLEMENTAL_LOGGERS = ['filelock', 'protego', 'twisted'] _ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS # Mutable state shared with the Scrapy monkey-patch below. `initialize_logging` refreshes diff --git a/src/apify/storage_clients/_apify/_alias_resolving.py b/src/apify/storage_clients/_apify/_alias_resolving.py index fd4c23fe..1bfff85e 100644 --- a/src/apify/storage_clients/_apify/_alias_resolving.py +++ b/src/apify/storage_clients/_apify/_alias_resolving.py @@ -8,7 +8,7 @@ from apify_client import ApifyClientAsync -from ._utils import hash_api_base_url_and_token +from ._utils import hash_api_public_base_url_and_token if TYPE_CHECKING: from collections.abc import Callable @@ -177,7 +177,7 @@ async def _get_alias_init_lock(cls) -> Lock: async def _get_alias_map(cls, configuration: Configuration) -> dict[str, str]: """Get the aliases and storage ids mapping from the default kvs. - Mapping is loaded from kvs only once and is shared for all instances of the _AliasResolver class. + Mapping is loaded from kvs only once and is shared for all instances of the AliasResolver class. Args: configuration: Configuration object to use for accessing the default KVS. @@ -222,12 +222,11 @@ async def store_mapping(self, storage_id: str) -> None: if not self._configuration.is_at_home: logging.getLogger(__name__).debug( - '_AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.' + 'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.' ) return default_kvs_client = await self._get_default_kvs_client(self._configuration) - await default_kvs_client.get() try: record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY) @@ -246,7 +245,7 @@ def _storage_key(self) -> str: [ self._storage_type, self._alias, - hash_api_base_url_and_token(self._configuration), + hash_api_public_base_url_and_token(self._configuration), ] ) diff --git a/src/apify/storage_clients/_apify/_dataset_client.py b/src/apify/storage_clients/_apify/_dataset_client.py index 1d3351d8..de9634fd 100644 --- a/src/apify/storage_clients/_apify/_dataset_client.py +++ b/src/apify/storage_clients/_apify/_dataset_client.py @@ -144,6 +144,8 @@ async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) -> async with self._charge_lock(), self._lock: items = data if self._is_sequence_of_items(data) else [data] + if not items: + return limit = self._compute_limit_for_push(len(items)) items = items[:limit] diff --git a/src/apify/storage_clients/_apify/_request_queue_shared_client.py b/src/apify/storage_clients/_apify/_request_queue_shared_client.py index 496ec4f1..ea4965c4 100644 --- a/src/apify/storage_clients/_apify/_request_queue_shared_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_shared_client.py @@ -198,7 +198,9 @@ async def fetch_next_request(self) -> Request | None: ) return None - # Use get request to ensure we have the full request object. + # `_get_or_hydrate_request` may return a request from the queue-head cache, which is populated by + # `list_and_lock_head` and only holds a partial request (no user data, no headers). Re-fetch it by id to + # guarantee the caller gets the full request object. request = await self._get_request_by_id(next_request_id) if request is None: logger.debug( diff --git a/src/apify/storage_clients/_apify/_storage_client.py b/src/apify/storage_clients/_apify/_storage_client.py index 02c06258..b0c569b5 100644 --- a/src/apify/storage_clients/_apify/_storage_client.py +++ b/src/apify/storage_clients/_apify/_storage_client.py @@ -9,7 +9,7 @@ from ._dataset_client import ApifyDatasetClient from ._key_value_store_client import ApifyKeyValueStoreClient from ._request_queue_client import ApifyRequestQueueClient -from ._utils import hash_api_base_url_and_token +from ._utils import hash_api_public_base_url_and_token from apify._configuration import Configuration as ApifyConfiguration from apify._utils import docs_group @@ -126,7 +126,9 @@ def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> H if isinstance(configuration, ApifyConfiguration): # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time. # Whichever client variation gets used first, wins. - return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration) + return super().get_storage_client_cache_key(configuration), hash_api_public_base_url_and_token( + configuration + ) config_class = type(configuration) raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}')) diff --git a/src/apify/storage_clients/_apify/_utils.py b/src/apify/storage_clients/_apify/_utils.py index c9bcd54d..92127549 100644 --- a/src/apify/storage_clients/_apify/_utils.py +++ b/src/apify/storage_clients/_apify/_utils.py @@ -39,7 +39,7 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> return url_safe_key[:request_id_length] -def hash_api_base_url_and_token(configuration: Configuration) -> str: +def hash_api_public_base_url_and_token(configuration: Configuration) -> str: """Hash configuration.api_public_base_url and configuration.token in deterministic way.""" if configuration.api_public_base_url is None or configuration.token is None: raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.") diff --git a/tests/unit/storage_clients/test_alias_resolver.py b/tests/unit/storage_clients/test_alias_resolver.py index 707b6a56..2dc3e8ba 100644 --- a/tests/unit/storage_clients/test_alias_resolver.py +++ b/tests/unit/storage_clients/test_alias_resolver.py @@ -105,7 +105,6 @@ async def test_store_mapping_uses_injected_configuration_is_at_home() -> None: fake_kvs_client = AsyncMock() fake_kvs_client.get_record = AsyncMock(return_value=None) fake_kvs_client.set_record = AsyncMock(return_value=None) - fake_kvs_client.get = AsyncMock(return_value={'id': 'default-kvs-id'}) with patch.object(AliasResolver, '_get_default_kvs_client', return_value=fake_kvs_client): await resolver.store_mapping(storage_id='new-id-789') diff --git a/tests/unit/test_proxy_configuration.py b/tests/unit/test_proxy_configuration.py index 29ac2876..2d6ed74c 100644 --- a/tests/unit/test_proxy_configuration.py +++ b/tests/unit/test_proxy_configuration.py @@ -13,7 +13,7 @@ from apify_client import ApifyClientAsync from apify._consts import ApifyEnvVars -from apify._proxy_configuration import ProxyConfiguration, is_url +from apify._proxy_configuration import ProxyConfiguration if TYPE_CHECKING: from pytest_httpserver import HTTPServer @@ -588,37 +588,6 @@ def request_handler(request: Request, response: Response) -> Response: assert call_mock.call_count == 0 -def test_is_url_validation() -> None: - assert is_url('http://dummy-proxy.com:8000') is True - assert is_url('https://example.com') is True - assert is_url('http://localhost') is True - assert is_url('https://12.34.56.78') is True - assert is_url('http://12.34.56.78:9012') is True - assert is_url('http://::1') is True - assert is_url('https://2f45:4da6:8f56:af8c:5dce:c1de:14d2:8661') is True - - assert is_url('dummy-proxy.com:8000') is False - assert is_url('gyfwgfhkjhljkfhdsf') is False - assert is_url('http://') is False - assert is_url('http://example') is False - assert is_url('http:/example.com') is False - assert is_url('12.34.56.78') is False - assert is_url('::1') is False - assert is_url('https://4da6:8f56:af8c:5dce:c1de:14d2:8661') is False - - -@pytest.mark.parametrize( - 'value', - [ - pytest.param('', id='empty_string'), - pytest.param(None, id='none'), - ], -) -def test_is_url_with_completely_unparsable_input(value: str | None) -> None: - """Test is_url with input that causes urlparse to fail.""" - assert is_url(value) is False # ty: ignore[invalid-argument-type] - - def test_check_min_length_raises() -> None: """Test that _check raises ValueError for values shorter than min_length.""" from apify._proxy_configuration import _check diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 526fd5d2..fcc5a4f4 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,12 +1,11 @@ from __future__ import annotations -from enum import Enum from typing import TYPE_CHECKING from unittest.mock import patch import pytest -from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython, maybe_extract_enum_member_value +from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython def test_ipython_detection_when_active() -> None: @@ -39,24 +38,6 @@ def test_get_system_info_ipython_flag(*, ipython_running: bool) -> None: assert info['is_running_in_ipython'] is ipython_running -def test_maybe_extract_enum_member_value_with_enum() -> None: - """Test that enum members are unwrapped to their values.""" - - class Color(Enum): - RED = 'red' - BLUE = 'blue' - - assert maybe_extract_enum_member_value(Color.RED) == 'red' - assert maybe_extract_enum_member_value(Color.BLUE) == 'blue' - - -def test_maybe_extract_enum_member_value_with_non_enum() -> None: - """Test that non-enum values are returned unchanged.""" - assert maybe_extract_enum_member_value('hello') == 'hello' - assert maybe_extract_enum_member_value(42) == 42 - assert maybe_extract_enum_member_value(None) is None - - if TYPE_CHECKING: # Regression guard: if `docs_group`/`docs_name` stop being identity-typed (`Callable[[T], T]`), # the decorated classes degrade to `Unknown`, the accesses below stop erroring, and the diff --git a/uv.lock b/uv.lock index 08991e4e..8816cc39 100644 --- a/uv.lock +++ b/uv.lock @@ -7,10 +7,9 @@ exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for exclude-newer-span = "PT24H" [options.exclude-newer-package] -apify-fingerprint-datapoints = false -crawlee = false -apify-shared = false apify-client = false +crawlee = false +apify-fingerprint-datapoints = false [[package]] name = "annotated-types"