Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Software Development :: Libraries",
"Typing :: Typed",
]
keywords = [
"apify",
Expand Down Expand Up @@ -239,7 +240,6 @@ exclude-newer = "24 hours"
[tool.uv.exclude-newer-package]
# Allow internal Apify packages to install immediately.
apify-client = false
apify-shared = false
apify_fingerprint_datapoints = false
crawlee = false

Expand Down
1 change: 0 additions & 1 deletion src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ async def __aenter__(self) -> Self:
# Update the global Actor proxy to refer to this instance.
cast('Proxy', Actor).__wrapped__ = self # ty: ignore[invalid-assignment]
self._is_exiting = False
self._was_final_persist_state_emitted = False

# Initialize the storage client and register it in the service locator.
_ = self._storage_client
Expand Down
3 changes: 0 additions & 3 deletions src/apify/_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
)
"""Regex matching encrypted input values with base64-encoded components."""

EXIT_CODE_SUCCESS = 0
"""Exit code indicating that the Actor finished successfully."""

EXIT_CODE_ERROR_USER_FUNCTION_THREW = 91
"""Exit code indicating that the Actor's main function raised an exception."""

Expand Down
2 changes: 0 additions & 2 deletions src/apify/_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ def private_decrypt(
decipher_bytes = decryptor.update(encrypted_data_bytes) + decryptor.finalize()
except InvalidTagException as exc:
raise ValueError('Decryption failed, malformed encrypted value or password.') from exc
except Exception:
raise

return decipher_bytes.decode('utf-8')

Expand Down
20 changes: 0 additions & 20 deletions src/apify/_proxy_configuration.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import ipaddress
import json
import re
from dataclasses import dataclass, field
from re import Pattern
from typing import TYPE_CHECKING, Any
from urllib.parse import urljoin, urlparse

import impit
from yarl import URL
Expand Down Expand Up @@ -38,24 +36,6 @@
SESSION_ID_MAX_LENGTH = 50


def is_url(url: str) -> bool:
"""Check if the given string is a valid URL."""
try:
parsed_url = urlparse(urljoin(url, '/'))
has_all_parts = all([parsed_url.scheme, parsed_url.netloc, parsed_url.path])
is_domain = '.' in parsed_url.netloc
is_localhost = parsed_url.netloc == 'localhost'
try:
ipaddress.ip_address(parsed_url.netloc)
is_ip_address = True
except Exception:
is_ip_address = False

return has_all_parts and any([is_domain, is_localhost, is_ip_address])
except Exception:
return False


def _check(
value: Any,
*,
Expand Down
8 changes: 0 additions & 8 deletions src/apify/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import sys
from collections.abc import Callable
from contextlib import asynccontextmanager
from enum import Enum
from functools import wraps
from importlib import metadata
from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast
Expand Down Expand Up @@ -123,13 +122,6 @@ def wrapper(func: T) -> T:
return wrapper


def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any:
"""Extract the value of an enumeration member if it is an Enum, otherwise return the original value."""
if isinstance(maybe_enum_member, Enum):
return maybe_enum_member.value
return maybe_enum_member


class ReentrantLock:
"""A reentrant lock implementation for asyncio using asyncio.Lock."""

Expand Down
2 changes: 1 addition & 1 deletion src/apify/scrapy/_logging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Define logger names.
_PRIMARY_LOGGERS = ['apify', 'apify_client', 'scrapy']
_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted']
_SUPPLEMENTAL_LOGGERS = ['filelock', 'protego', 'twisted']
_ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS

# Mutable state shared with the Scrapy monkey-patch below. `initialize_logging` refreshes
Expand Down
9 changes: 4 additions & 5 deletions src/apify/storage_clients/_apify/_alias_resolving.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from apify_client import ApifyClientAsync

from ._utils import hash_api_base_url_and_token
from ._utils import hash_api_public_base_url_and_token

if TYPE_CHECKING:
from collections.abc import Callable
Expand Down Expand Up @@ -177,7 +177,7 @@ async def _get_alias_init_lock(cls) -> Lock:
async def _get_alias_map(cls, configuration: Configuration) -> dict[str, str]:
"""Get the aliases and storage ids mapping from the default kvs.

Mapping is loaded from kvs only once and is shared for all instances of the _AliasResolver class.
Mapping is loaded from kvs only once and is shared for all instances of the AliasResolver class.

Args:
configuration: Configuration object to use for accessing the default KVS.
Expand Down Expand Up @@ -222,12 +222,11 @@ async def store_mapping(self, storage_id: str) -> None:

if not self._configuration.is_at_home:
logging.getLogger(__name__).debug(
'_AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
)
return

default_kvs_client = await self._get_default_kvs_client(self._configuration)
await default_kvs_client.get()

try:
record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY)
Expand All @@ -246,7 +245,7 @@ def _storage_key(self) -> str:
[
self._storage_type,
self._alias,
hash_api_base_url_and_token(self._configuration),
hash_api_public_base_url_and_token(self._configuration),
]
)

Expand Down
2 changes: 2 additions & 0 deletions src/apify/storage_clients/_apify/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) ->

async with self._charge_lock(), self._lock:
items = data if self._is_sequence_of_items(data) else [data]
if not items:
return
limit = self._compute_limit_for_push(len(items))
items = items[:limit]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ async def fetch_next_request(self) -> Request | None:
)
return None

# Use get request to ensure we have the full request object.
# `_get_or_hydrate_request` may return a request from the queue-head cache, which is populated by
# `list_and_lock_head` and only holds a partial request (no user data, no headers). Re-fetch it by id to
# guarantee the caller gets the full request object.
request = await self._get_request_by_id(next_request_id)
if request is None:
logger.debug(
Expand Down
6 changes: 4 additions & 2 deletions src/apify/storage_clients/_apify/_storage_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ._dataset_client import ApifyDatasetClient
from ._key_value_store_client import ApifyKeyValueStoreClient
from ._request_queue_client import ApifyRequestQueueClient
from ._utils import hash_api_base_url_and_token
from ._utils import hash_api_public_base_url_and_token
from apify._configuration import Configuration as ApifyConfiguration
from apify._utils import docs_group

Expand Down Expand Up @@ -126,7 +126,9 @@ def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> H
if isinstance(configuration, ApifyConfiguration):
# It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
# Whichever client variation gets used first, wins.
return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
return super().get_storage_client_cache_key(configuration), hash_api_public_base_url_and_token(
configuration
)

config_class = type(configuration)
raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}'))
2 changes: 1 addition & 1 deletion src/apify/storage_clients/_apify/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) ->
return url_safe_key[:request_id_length]


def hash_api_base_url_and_token(configuration: Configuration) -> str:
def hash_api_public_base_url_and_token(configuration: Configuration) -> str:
"""Hash configuration.api_public_base_url and configuration.token in deterministic way."""
if configuration.api_public_base_url is None or configuration.token is None:
raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.")
Expand Down
1 change: 0 additions & 1 deletion tests/unit/storage_clients/test_alias_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ async def test_store_mapping_uses_injected_configuration_is_at_home() -> None:
fake_kvs_client = AsyncMock()
fake_kvs_client.get_record = AsyncMock(return_value=None)
fake_kvs_client.set_record = AsyncMock(return_value=None)
fake_kvs_client.get = AsyncMock(return_value={'id': 'default-kvs-id'})

with patch.object(AliasResolver, '_get_default_kvs_client', return_value=fake_kvs_client):
await resolver.store_mapping(storage_id='new-id-789')
Expand Down
33 changes: 1 addition & 32 deletions tests/unit/test_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from apify_client import ApifyClientAsync

from apify._consts import ApifyEnvVars
from apify._proxy_configuration import ProxyConfiguration, is_url
from apify._proxy_configuration import ProxyConfiguration

if TYPE_CHECKING:
from pytest_httpserver import HTTPServer
Expand Down Expand Up @@ -588,37 +588,6 @@ def request_handler(request: Request, response: Response) -> Response:
assert call_mock.call_count == 0


def test_is_url_validation() -> None:
assert is_url('http://dummy-proxy.com:8000') is True
assert is_url('https://example.com') is True
assert is_url('http://localhost') is True
assert is_url('https://12.34.56.78') is True
assert is_url('http://12.34.56.78:9012') is True
assert is_url('http://::1') is True
assert is_url('https://2f45:4da6:8f56:af8c:5dce:c1de:14d2:8661') is True

assert is_url('dummy-proxy.com:8000') is False
assert is_url('gyfwgfhkjhljkfhdsf') is False
assert is_url('http://') is False
assert is_url('http://example') is False
assert is_url('http:/example.com') is False
assert is_url('12.34.56.78') is False
assert is_url('::1') is False
assert is_url('https://4da6:8f56:af8c:5dce:c1de:14d2:8661') is False


@pytest.mark.parametrize(
'value',
[
pytest.param('', id='empty_string'),
pytest.param(None, id='none'),
],
)
def test_is_url_with_completely_unparsable_input(value: str | None) -> None:
"""Test is_url with input that causes urlparse to fail."""
assert is_url(value) is False # ty: ignore[invalid-argument-type]


def test_check_min_length_raises() -> None:
"""Test that _check raises ValueError for values shorter than min_length."""
from apify._proxy_configuration import _check
Expand Down
21 changes: 1 addition & 20 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from __future__ import annotations

from enum import Enum
from typing import TYPE_CHECKING
from unittest.mock import patch

import pytest

from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython, maybe_extract_enum_member_value
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython


def test_ipython_detection_when_active() -> None:
Expand Down Expand Up @@ -39,24 +38,6 @@ def test_get_system_info_ipython_flag(*, ipython_running: bool) -> None:
assert info['is_running_in_ipython'] is ipython_running


def test_maybe_extract_enum_member_value_with_enum() -> None:
"""Test that enum members are unwrapped to their values."""

class Color(Enum):
RED = 'red'
BLUE = 'blue'

assert maybe_extract_enum_member_value(Color.RED) == 'red'
assert maybe_extract_enum_member_value(Color.BLUE) == 'blue'


def test_maybe_extract_enum_member_value_with_non_enum() -> None:
"""Test that non-enum values are returned unchanged."""
assert maybe_extract_enum_member_value('hello') == 'hello'
assert maybe_extract_enum_member_value(42) == 42
assert maybe_extract_enum_member_value(None) is None


if TYPE_CHECKING:
# Regression guard: if `docs_group`/`docs_name` stop being identity-typed (`Callable[[T], T]`),
# the decorated classes degrade to `Unknown`, the accesses below stop erroring, and the
Expand Down
5 changes: 2 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading