Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions src/launchpad/size/treemap/android_known_libraries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Catalog of well-known Android libraries for DEX treemap grouping.

DEX classes are grouped into a package hierarchy (e.g. ``com`` > ``google`` >
``firebase`` > …). Third-party SDKs are indistinguishable from the app's own code
in that hierarchy. This catalog maps distinctive Java/Kotlin package prefixes to a
canonical library name so the treemap builder can pull recognized classes out and
group them under a single ``Libraries`` node.

Keep entries conservative: use prefixes that are distinctive enough to avoid
colliding with first-party code. In particular, prefer specific sub-packages over
broad vendor roots (e.g. ``com.emergetools.snapshots`` rather than
``com.emergetools``, which would also match an app shipped by Emerge Tools).
"""

from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True)
class KnownLibrary:
"""A known Android library and the package prefixes used to recognize it."""

name: str
# Distinctive package prefixes that map to this library. Matching is done on
# package boundaries: a prefix matches an FQN that equals it or starts with
# ``prefix + "."``.
package_prefixes: tuple[str, ...]


# Curated, non-exhaustive catalog of popular Android libraries. Extend as needed.
KNOWN_LIBRARIES: tuple[KnownLibrary, ...] = (
# Jetpack / Kotlin
KnownLibrary("AndroidX", ("androidx",)),
KnownLibrary("Android Support", ("android.support",)),
KnownLibrary("Kotlin", ("kotlin", "kotlinx")),
# Google
KnownLibrary("Firebase", ("com.google.firebase",)),
KnownLibrary("Google Play Services", ("com.google.android.gms",)),
KnownLibrary("Google Play Core", ("com.google.android.play",)),
KnownLibrary("Material Components", ("com.google.android.material",)),
KnownLibrary("Gson", ("com.google.gson",)),
KnownLibrary("Guava", ("com.google.common",)),
KnownLibrary("Protobuf", ("com.google.protobuf",)),
KnownLibrary("Dagger", ("dagger",)),
# Square
KnownLibrary("OkHttp", ("okhttp3",)),
KnownLibrary("Okio", ("okio",)),
KnownLibrary("Retrofit", ("retrofit2",)),
KnownLibrary("Moshi", ("com.squareup.moshi",)),
KnownLibrary("Picasso", ("com.squareup.picasso",)),
KnownLibrary("LeakCanary", ("leakcanary", "com.squareup.leakcanary")),
# Image loading
KnownLibrary("Coil", ("coil",)),
KnownLibrary("Glide", ("com.bumptech.glide",)),
# Reactive / async
KnownLibrary("RxJava", ("io.reactivex", "rx")),
KnownLibrary("Timber", ("timber.log",)),
KnownLibrary("EventBus", ("org.greenrobot.eventbus",)),
# Serialization
KnownLibrary("Jackson", ("com.fasterxml.jackson",)),
KnownLibrary("Apache Commons", ("org.apache.commons",)),
# Observability
KnownLibrary("Sentry", ("io.sentry",)),
KnownLibrary("Bugsnag", ("com.bugsnag",)),
KnownLibrary("Datadog", ("com.datadog",)),
# Analytics / attribution
KnownLibrary("Amplitude", ("com.amplitude",)),
KnownLibrary("Mixpanel", ("com.mixpanel",)),
KnownLibrary("Segment", ("com.segment.analytics",)),
KnownLibrary("Adjust", ("com.adjust.sdk",)),
KnownLibrary("AppsFlyer", ("com.appsflyer",)),
KnownLibrary("Branch", ("io.branch",)),
KnownLibrary("Braze", ("com.braze", "com.appboy")),
# Other popular SDKs
KnownLibrary("Facebook", ("com.facebook",)),
KnownLibrary("Stripe", ("com.stripe",)),
KnownLibrary("Lottie", ("com.airbnb.lottie",)),
KnownLibrary("Realm", ("io.realm",)),
KnownLibrary("Koin", ("org.koin",)),
KnownLibrary("Ktor", ("io.ktor",)),
KnownLibrary("ThreeTenABP", ("org.threeten.bp", "com.jakewharton.threetenabp")),
KnownLibrary("EmergeTools", ("com.emergetools.snapshots",)),
)


def _build_prefix_index() -> tuple[tuple[str, str], ...]:
# Sort by descending prefix length so the most specific prefix wins (e.g.
# ``com.google.firebase`` is preferred over a hypothetical ``com.google``).
pairs = [(prefix, library.name) for library in KNOWN_LIBRARIES for prefix in library.package_prefixes]
pairs.sort(key=lambda pair: len(pair[0]), reverse=True)
return tuple(pairs)


_PREFIX_INDEX = _build_prefix_index()


def resolve_known_library(fqn: str) -> tuple[str, str] | None:
"""Resolve a class FQN to a known library.

Returns ``(library_name, matched_prefix)`` for the most specific matching
package prefix, or ``None`` when the FQN doesn't belong to a known library.
Matching is done on package boundaries so ``androidx`` matches
``androidx.core.App`` but not ``androidxfoo.Bar``.
"""
if not fqn:
return None

for prefix, library_name in _PREFIX_INDEX:
if fqn == prefix or fqn.startswith(prefix + "."):
return library_name, prefix

return None
152 changes: 105 additions & 47 deletions src/launchpad/size/treemap/dex_element_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
from launchpad.parsers.android.dex.types import ClassDefinition
from launchpad.size.models.common import FileInfo
from launchpad.size.models.treemap import TreemapElement, TreemapType
from launchpad.size.treemap.android_known_libraries import resolve_known_library
from launchpad.size.treemap.treemap_element_builder import TreemapElementBuilder
from launchpad.utils.logging import get_logger

logger = get_logger(__name__)

LIBRARIES_NODE_NAME = "Libraries"


class DexElementBuilder(TreemapElementBuilder):
def __init__(
Expand All @@ -25,92 +28,147 @@ def build_element(self, file_info: FileInfo, display_name: str) -> TreemapElemen
# to build the treemap. This is because there could be multiple
# DEX files in APK and we want to group them by package vs file.

root_packages = self._build_package_tree()
size = sum(package.size for package in root_packages)
children = self._build_children()
size = sum(child.size for child in children)

return TreemapElement(
name=display_name,
size=size,
type=TreemapType.DEX,
path=file_info.path,
is_dir=True,
children=root_packages,
children=children,
)

def _build_package_tree(self) -> list[TreemapElement]:
package_tree: dict[str, dict] = {}
def _build_children(self) -> list[TreemapElement]:
"""Partition classes into known third-party libraries and first-party code.

Recognized library classes are grouped under a single ``Libraries`` node
(keyed by canonical library name, with the matched package prefix stripped
from the nested hierarchy). Everything else keeps its normal package tree.
Sizes are preserved since each class is placed exactly once.
"""
# library name -> list of (package_parts, class_name, class_def) with the
# matched library prefix stripped from the package path.
library_entries: dict[str, list[tuple[list[str], str, ClassDefinition]]] = {}
first_party: list[ClassDefinition] = []

for class_def in self.class_definitions:
fqn = class_def.fqn()
match = resolve_known_library(fqn)
if match is None:
first_party.append(class_def)
continue

library_name, matched_prefix = match
remainder = fqn[len(matched_prefix) + 1 :] if fqn.startswith(matched_prefix + ".") else ""
parts = remainder.split(".") if remainder else []
package_parts = parts[:-1]
class_name = parts[-1] if parts else class_def.get_name()
library_entries.setdefault(library_name, []).append((package_parts, class_name, class_def))

children = self._build_package_elements(first_party)

libraries_node = self._build_libraries_node(library_entries)
if libraries_node is not None:
children.insert(0, libraries_node)

return children

def _build_package_elements(self, class_definitions: list[ClassDefinition]) -> list[TreemapElement]:
"""Build the package hierarchy for a set of classes keyed by their FQN."""
root = self._new_node()

for class_def in class_definitions:
fqn = class_def.fqn()
parts = fqn.split(".")

if len(parts) < 2:
logger.warning(f"Invalid class definition with no package: {fqn}")
continue

class_name = parts[-1]
package_parts = parts[:-1]
self._insert_class(root, parts[:-1], parts[-1], class_def)

# Build the package hierarchy
current_level = package_tree
for package_part in package_parts:
if package_part not in current_level:
current_level[package_part] = {"packages": {}, "classes": {}}
current_level = current_level[package_part]["packages"]

# Add the class to the leaf package
leaf_package = package_tree
for package_part in package_parts:
if package_part not in leaf_package:
leaf_package[package_part] = {"packages": {}, "classes": {}}
if package_part == package_parts[-1]:
# This is the final package, add the class here
leaf_package[package_part]["classes"][class_name] = {"class_def": class_def}
else:
# Navigate to the next level
leaf_package = leaf_package[package_part]["packages"]

return self._convert_tree_to_elements(package_tree)

def _convert_tree_to_elements(self, package_tree: dict[str, dict], parent_path: str = "") -> list[TreemapElement]:
elements: list[TreemapElement] = []
return self._node_to_elements(root)

for name, node in package_tree.items():
package_path = f"{parent_path}.{name}" if parent_path else f"{name}"
def _build_libraries_node(
self,
library_entries: dict[str, list[tuple[list[str], str, ClassDefinition]]],
) -> TreemapElement | None:
if not library_entries:
return None

library_children: list[TreemapElement] = []
for library_name, entries in library_entries.items():
root = self._new_node()
for package_parts, class_name, class_def in entries:
self._insert_class(root, package_parts, class_name, class_def)

children = self._node_to_elements(root, parent_path=library_name)
library_children.append(
TreemapElement(
name=library_name,
size=sum(child.size for child in children),
type=TreemapType.DEX,
path=library_name,
is_dir=True,
children=children,
)
)

# Process sub-packages
children = []
if "packages" in node:
children.extend(self._convert_tree_to_elements(node["packages"], package_path))
library_children.sort(key=lambda child: child.size, reverse=True)

# Process classes in this package
if "classes" in node:
for class_name, class_node in node["classes"].items():
class_def = class_node["class_def"]
class_element = self._create_class_element(class_def)
children.append(class_element)
return TreemapElement(
name=LIBRARIES_NODE_NAME,
size=sum(child.size for child in library_children),
type=TreemapType.DEX,
path=LIBRARIES_NODE_NAME,
is_dir=True,
children=library_children,
)

total_size = sum(child.size for child in children)
@staticmethod
def _new_node() -> dict:
return {"packages": {}, "classes": {}}

def _insert_class(
self,
root: dict,
package_parts: list[str],
class_name: str,
class_def: ClassDefinition,
) -> None:
node = root
for part in package_parts:
node = node["packages"].setdefault(part, self._new_node())
node["classes"][class_name] = class_def

def _node_to_elements(self, node: dict, parent_path: str = "") -> list[TreemapElement]:
elements: list[TreemapElement] = []

for name, child_node in node["packages"].items():
package_path = f"{parent_path}.{name}" if parent_path else name
children = self._node_to_elements(child_node, package_path)
elements.append(
TreemapElement(
name=name,
size=total_size,
size=sum(child.size for child in children),
type=TreemapType.DEX,
path=package_path,
is_dir=True,
children=children,
)
)

for class_def in node["classes"].values():
elements.append(self._create_class_element(class_def))

return elements

def _create_class_element(self, class_def: ClassDefinition) -> TreemapElement:
class_size = class_def.size

return TreemapElement(
name=class_def.get_name(),
size=class_size,
size=class_def.size,
type=TreemapType.DEX,
path=class_def.fqn(),
is_dir=False,
Expand Down
43 changes: 43 additions & 0 deletions tests/unit/size/treemap/test_android_known_libraries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Unit tests for the Android known-libraries catalog."""

from launchpad.size.treemap.android_known_libraries import resolve_known_library


def test_resolves_prefix_to_library() -> None:
assert resolve_known_library("androidx.core.app.NotificationCompat") == ("AndroidX", "androidx")
assert resolve_known_library("kotlin.collections.CollectionsKt") == ("Kotlin", "kotlin")
assert resolve_known_library("io.sentry.Sentry") == ("Sentry", "io.sentry")


def test_most_specific_prefix_wins() -> None:
# com.google.firebase is more specific than any shorter com.google.* prefix.
assert resolve_known_library("com.google.firebase.analytics.FirebaseAnalytics") == (
"Firebase",
"com.google.firebase",
)
assert resolve_known_library("com.google.android.gms.tasks.Task") == (
"Google Play Services",
"com.google.android.gms",
)


def test_matches_on_package_boundaries() -> None:
# Exact prefix match (class living directly in the prefix package).
assert resolve_known_library("androidx") == ("AndroidX", "androidx")
# A package that merely starts with the prefix string must not match.
assert resolve_known_library("androidxtra.Foo") is None
assert resolve_known_library("kotlinpoet.Foo") is None


def test_emergetools_snapshots_is_grouped_but_app_is_not() -> None:
assert resolve_known_library("com.emergetools.snapshots.SnapshotTest") == (
"EmergeTools",
"com.emergetools.snapshots",
)
# A first-party Emerge Tools app must not be grouped as a library.
assert resolve_known_library("com.emergetools.hackernews.MainActivity") is None


def test_unknown_and_empty_names_return_none() -> None:
assert resolve_known_library("com.example.myapp.MainActivity") is None
assert resolve_known_library("") is None
Loading
Loading