Skip to content

Commit db8dbfe

Browse files
dmcilvaneyCopilotPawelWMS
authored
bug: fix remote build dependency variability (#12842)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>
1 parent 37046ef commit db8dbfe

4 files changed

Lines changed: 327 additions & 17 deletions

File tree

toolkit/tools/grapher/grapher.go

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/microsoft/azurelinux/toolkit/tools/internal/packagerepo/repocloner/rpmrepocloner"
1313
"github.com/microsoft/azurelinux/toolkit/tools/internal/pkggraph"
1414
"github.com/microsoft/azurelinux/toolkit/tools/internal/pkgjson"
15+
"github.com/microsoft/azurelinux/toolkit/tools/internal/sliceutils"
1516
"github.com/microsoft/azurelinux/toolkit/tools/internal/timestamp"
1617
"github.com/microsoft/azurelinux/toolkit/tools/pkg/profile"
1718

@@ -23,14 +24,13 @@ var (
2324
input = exe.InputFlag(app, "Input json listing all local SRPMs")
2425
output = exe.OutputFlag(app, "Output file to export the graph to")
2526

26-
logFlags = exe.SetupLogFlags(app)
27-
profFlags = exe.SetupProfileFlags(app)
28-
strictGoals = app.Flag("strict-goals", "Don't allow missing goal packages").Bool()
29-
strictUnresolved = app.Flag("strict-unresolved", "Don't allow missing unresolved packages").Bool()
30-
timestampFile = app.Flag("timestamp-file", "File that stores timestamps for this program.").String()
31-
usePMCtoResolveCycles = app.Flag("usePMCtoresolvecycles", "Cycles will be resolved by downloading rpm packages from PMC if locally unavailable").Bool()
32-
tlsClientCert = app.Flag("tls-cert", "TLS client certificate to use when downloading files.").String()
33-
tlsClientKey = app.Flag("tls-key", "TLS client key to use when downloading files.").String()
27+
logFlags = exe.SetupLogFlags(app)
28+
profFlags = exe.SetupProfileFlags(app)
29+
strictGoals = app.Flag("strict-goals", "Don't allow missing goal packages").Bool()
30+
strictUnresolved = app.Flag("strict-unresolved", "Don't allow missing unresolved packages").Bool()
31+
timestampFile = app.Flag("timestamp-file", "File that stores timestamps for this program.").String()
32+
tlsClientCert = app.Flag("tls-cert", "TLS client certificate to use when downloading files.").String()
33+
tlsClientKey = app.Flag("tls-key", "TLS client key to use when downloading files.").String()
3434

3535
resolveCyclesFromUpstream = app.Flag("resolve-cycles-from-upstream", "Let grapher resolve cycles by marking rpms available in repo as remote").Bool()
3636
outDir = exe.OutputDirFlag(app, "Directory to download packages into.")
@@ -43,8 +43,6 @@ var (
4343
disableDefaultRepos = app.Flag("disable-default-repos", "Disable pulling packages from PMC repos").Bool()
4444
ignoreVersionToResolveSelfDep = app.Flag("ignore-version-to-resolve-selfdep", "Ignore package version while downloading package from upstream when resolving cycle").Bool()
4545
repoSnapshotTime = app.Flag("repo-snapshot-time", "Optional: Repo time limit for tdnf virtual snapshot").String()
46-
47-
depGraph = pkggraph.NewPkgGraph()
4846
)
4947

5048
func main() {
@@ -113,7 +111,7 @@ func main() {
113111
}
114112

115113
// addUnresolvedPackage adds an unresolved node to the graph representing the
116-
// packged described in the PackgetVer structure. Returns an error if the node
114+
// package described in the PackageVer structure. Returns an error if the node
117115
// could not be created.
118116
func addUnresolvedPackage(g *pkggraph.PkgGraph, pkgVer *pkgjson.PackageVer) (newRunNode *pkggraph.PkgNode, err error) {
119117
logger.Log.Debugf("Adding unresolved %s", pkgVer)
@@ -122,7 +120,9 @@ func addUnresolvedPackage(g *pkggraph.PkgGraph, pkgVer *pkgjson.PackageVer) (new
122120
return
123121
}
124122

125-
nodes, err := g.FindBestPkgNode(pkgVer)
123+
// Double check that the package is not already in the graph. A previous check should have already found the node
124+
// and no call to addUnresolvedPackage() should have been made.
125+
nodes, err := g.FindExactPkgNodeFromPkg(pkgVer)
126126
if err != nil {
127127
return
128128
}
@@ -205,6 +205,36 @@ func addNodesForPackage(g *pkggraph.PkgGraph, pkg *pkgjson.Package) (foundDuplic
205205
return
206206
}
207207

208+
// findOrAddExactRemoteDependency ensures that a remote node is available in the graph for every unresolved dependency.
209+
// 1. Check if the exact dependency is already in the graph. If it is, reuse it.
210+
// 2. If it is not, create a new unresolved node for the dependency.
211+
// It is important that we only match on the exact dependency name and version. If we don't, we may end up with
212+
// unpredictable behavior in the scheduler. If two different remote dependencies are added to two different build
213+
// nodes of a single SRPM, then the scheduler may queue that node twice.
214+
func findOrAddExactRemoteDependency(g *pkggraph.PkgGraph, dependency *pkgjson.PackageVer) (selectedRemoteNode *pkggraph.PkgNode, err error) {
215+
existingRemoteNode, err := g.FindExactPkgNodeFromPkg(dependency)
216+
if err != nil {
217+
err = fmt.Errorf("failed to check lookup list for exact remote %+v:\n%w", dependency, err)
218+
return nil, err
219+
}
220+
221+
if existingRemoteNode == nil {
222+
// No exact match, add a new one.
223+
selectedRemoteNode, err = addUnresolvedPackage(g, dependency)
224+
if err != nil {
225+
err = fmt.Errorf("failed to add a remote node (%s):\n%w", dependency.Name, err)
226+
return nil, err
227+
}
228+
logger.Log.Debugf("Added new node: '%s' for dependency %+v", selectedRemoteNode.FriendlyName(), dependency)
229+
} else {
230+
// This exact dependency is already in the graph, so reuse it.
231+
selectedRemoteNode = existingRemoteNode.RunNode
232+
logger.Log.Debugf("Found existing exact remote node: '%s' for dependency %+v", selectedRemoteNode.FriendlyName(), dependency)
233+
}
234+
235+
return selectedRemoteNode, nil
236+
}
237+
208238
// addSingleDependency will add an edge between packageNode and the "Run" node for the
209239
// dependency described in the PackageVer structure. Returns an error if the
210240
// addition failed.
@@ -217,15 +247,17 @@ func addSingleDependency(g *pkggraph.PkgGraph, packageNode *pkggraph.PkgNode, de
217247
return err
218248
}
219249

220-
if nodes == nil {
221-
dependentNode, err = addUnresolvedPackage(g, dependency)
250+
// If we can't find the dependency in the graph, or it is a remote dependency, we need to do a bit of extra validation.
251+
if nodes == nil || nodes.RunNode.Type != pkggraph.TypeLocalRun {
252+
dependentNode, err = findOrAddExactRemoteDependency(g, dependency)
222253
if err != nil {
223-
err = fmt.Errorf("failed to add a package (%s):\n%w", dependency.Name, err)
254+
err = fmt.Errorf("failed to handle remote dependency from %+v to %+v:\n%w", packageNode.VersionedPkg, dependency, err)
224255
return err
225256
}
226257
} else {
227258
// All dependencies are assumed to be "Run" dependencies
228259
dependentNode = nodes.RunNode
260+
logger.Log.Debugf("Found existing node: '%s' for dependency %+v", dependentNode.FriendlyName(), dependency)
229261
}
230262

231263
if packageNode == dependentNode {
@@ -335,10 +367,14 @@ func populateGraph(graph *pkggraph.PkgGraph, repo *pkgjson.PackageRepo) (err err
335367
timestamp.StopEvent(nil) // add package nodes
336368
timestamp.StartEvent("add dependencies", nil)
337369

370+
// Sort the map to ensure the order is deterministic
371+
packageList := sliceutils.MapToSlice(uniquePackages)
372+
pkgjson.SortPackageList(packageList)
373+
338374
// Rescan and add all the dependencies
339375
logger.Log.Infof("Adding all dependencies from (%s)", *input)
340376
dependenciesAdded := 0
341-
for uniquePkg := range uniquePackages {
377+
for _, uniquePkg := range packageList {
342378
num, err := addPkgDependencies(graph, uniquePkg)
343379
if err != nil {
344380
err = fmt.Errorf("failed to add dependency %+v:\n%w", uniquePkg, err)
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package main
5+
6+
import (
7+
"os"
8+
"testing"
9+
10+
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
11+
"github.com/microsoft/azurelinux/toolkit/tools/internal/pkggraph"
12+
"github.com/microsoft/azurelinux/toolkit/tools/internal/pkgjson"
13+
"github.com/sirupsen/logrus"
14+
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
func TestMain(m *testing.M) {
19+
logger.InitStderrLog()
20+
logger.Log.SetLevel(logrus.FatalLevel)
21+
os.Exit(m.Run())
22+
}
23+
24+
// Make a "local" package of the given name. Each package with the same pkgName will share the same
25+
// requires, srpmpath, etc. The subPkgName is used to differentiate multiple run-nodes for each
26+
// package. The buildRequires should be common across all packages with the same pkgName.
27+
func makePackage(pkgName, subPkgName string, buildRequires []*pkgjson.PackageVer) *pkgjson.Package {
28+
// Create a package with the given name and version
29+
pkg := &pkgjson.Package{
30+
Provides: &pkgjson.PackageVer{
31+
Name: subPkgName,
32+
Version: "1.0",
33+
},
34+
SrpmPath: pkgName + ".src.rpm",
35+
RpmPath: pkgName + ".rpm",
36+
SourceDir: pkgName + "-src",
37+
SpecPath: pkgName + ".spec",
38+
Architecture: "x86_64",
39+
IsToolchain: false,
40+
RunTests: true,
41+
BuildRequires: buildRequires,
42+
}
43+
44+
return pkg
45+
}
46+
47+
// A very simple test case to validate that the test framework is working
48+
func TestValidateFramework(t *testing.T) {
49+
testRepo := &pkgjson.PackageRepo{
50+
Repo: []*pkgjson.Package{
51+
makePackage("pkg1", "pkg1-a", nil),
52+
makePackage("pkg1", "pkg1-b", nil),
53+
},
54+
}
55+
depGraph := pkggraph.NewPkgGraph()
56+
err := populateGraph(depGraph, testRepo)
57+
require.NoError(t, err)
58+
59+
// Validate that the graph is not empty
60+
require.Equal(t, 2, len(depGraph.AllRunNodes()))
61+
require.Equal(t, 2, len(depGraph.AllBuildNodes()))
62+
}
63+
64+
// Load a specially crafted repo containing several sub-packages of a single SRPM, and two malicious
65+
// packages that try to pollute the available remote dependencies. The test will check if the various build
66+
// nodes of the sub-package SRPM are all using the same set of dependencies. The other packages will add
67+
// new remote nodes to the graph since they may not be satisfied by the already existing nodes. Once
68+
// this happens, the sub-package build nodes may start using the "better" dependencies, which is not what we want.
69+
func TestScenarioMultiRemoteProvides(t *testing.T) {
70+
anyVerBr := []*pkgjson.PackageVer{
71+
{
72+
Name: "build-req",
73+
Version: "",
74+
Condition: "",
75+
SVersion: "",
76+
SCondition: "",
77+
},
78+
}
79+
lowVerBr := []*pkgjson.PackageVer{
80+
{
81+
Name: "build-req",
82+
Version: "1.0",
83+
Condition: "<",
84+
SVersion: "",
85+
SCondition: "",
86+
},
87+
}
88+
highVerBr := []*pkgjson.PackageVer{
89+
{
90+
Name: "build-req",
91+
Version: "2.0",
92+
Condition: ">",
93+
SVersion: "",
94+
SCondition: "",
95+
},
96+
}
97+
98+
// Define the test repo
99+
testRepo := pkgjson.PackageRepo{
100+
Repo: []*pkgjson.Package{
101+
makePackage("pkg1", "pkg1-a", anyVerBr),
102+
makePackage("other-pkg-1", "other-pkg-1", lowVerBr),
103+
makePackage("pkg1", "pkg1-b", anyVerBr),
104+
makePackage("other-pkg-2", "other-pkg-2", highVerBr),
105+
makePackage("pkg1", "pkg1-c", anyVerBr),
106+
},
107+
}
108+
109+
// The graph library is non-deterministic (likely due to the use of maps), so to accurately catch the
110+
// counter example we need to run the test multiple times. Experimentally, the test fails after about
111+
// 10 runs (p=0.1), so with 1000 runs the probability of false negatives is 0.9^1000 ~= 1.7E-46 (ie very
112+
// small).
113+
const numRuns = 1000
114+
for i := 0; i < numRuns; i++ {
115+
// Create the dependency graph
116+
depGraph := pkggraph.NewPkgGraph()
117+
err := populateGraph(depGraph, &testRepo)
118+
require.NoError(t, err)
119+
120+
// Check our invariants
121+
actualDepsUsed := make(map[pkgjson.PackageVer]bool)
122+
buildNodes := depGraph.AllBuildNodes()
123+
for _, buildNode := range buildNodes {
124+
125+
// Only care about the pkg1 build nodes
126+
if buildNode.SrpmPath != "pkg1.src.rpm" {
127+
continue
128+
}
129+
130+
dependencies := depGraph.From(buildNode.ID())
131+
132+
for dependencies.Next() {
133+
dep := dependencies.Node().(*pkggraph.PkgNode)
134+
// Add to the set
135+
actualDepsUsed[*dep.VersionedPkg] = true
136+
}
137+
}
138+
// Now check the map, if there is more than one entry something is wrong
139+
if len(actualDepsUsed) > 1 {
140+
t.Error("Build deps:")
141+
for dep := range actualDepsUsed {
142+
t.Logf("\t%s", dep)
143+
}
144+
assert.FailNowf(t, "Multiple dependencies used in a single build node", "Failed after %d runs", i+1)
145+
}
146+
}
147+
}

toolkit/tools/internal/pkgjson/pkgjson.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package pkgjson
66
import (
77
"fmt"
88
"regexp"
9+
"sort"
910
"strings"
1011

1112
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
@@ -73,11 +74,49 @@ type Package struct {
7374
RunTests bool `json:"RunTests"` // Should we run tests for this package.
7475
}
7576

77+
func packageLess(a, b PackageVer) bool {
78+
if a.Name == b.Name {
79+
v1 := versioncompare.New(a.Version)
80+
v2 := versioncompare.New(b.Version)
81+
return v1.Compare(v2) < 0
82+
}
83+
84+
return a.Name < b.Name
85+
}
86+
87+
// SortPackageList ensures the package list is sorted in a deterministic way. It will primarily sort by name, then
88+
// version. It will also sort the package lists (requires, buildRequires, testRequires) of each package.
89+
func SortPackageList(packages []*Package) {
90+
sort.Slice(packages, func(i, j int) bool {
91+
return packageLess(*packages[i].Provides, *packages[j].Provides)
92+
})
93+
94+
// For each package, also sort the lists of its dependencies
95+
for _, pkg := range packages {
96+
sort.Slice(pkg.Requires, func(i, j int) bool {
97+
return packageLess(*pkg.Requires[i], *pkg.Requires[j])
98+
})
99+
sort.Slice(pkg.BuildRequires, func(i, j int) bool {
100+
return packageLess(*pkg.BuildRequires[i], *pkg.BuildRequires[j])
101+
})
102+
sort.Slice(pkg.TestRequires, func(i, j int) bool {
103+
return packageLess(*pkg.TestRequires[i], *pkg.TestRequires[j])
104+
})
105+
}
106+
}
107+
76108
// ParsePackageJSON reads a package list json file
77109
func (pkg *PackageRepo) ParsePackageJSON(path string) (err error) {
78110
logger.Log.Infof("Opening %s", path)
79111

80-
return jsonutils.ReadJSONFile(path, &pkg)
112+
if err = jsonutils.ReadJSONFile(path, &pkg); err != nil {
113+
return err
114+
}
115+
116+
// Ensure deterministic ordering of the package list
117+
SortPackageList(pkg.Repo)
118+
119+
return nil
81120
}
82121

83122
// IsImplicitPackage returns true if a PackageVer represents an implicit provide.

0 commit comments

Comments
 (0)