Skip to content

Commit 7b46253

Browse files
authored
feat(render): add mock batch processor and source utilities (#80)
Add foundation for the component render command: * MockProcessor: batch rpmautospec + spectool execution in mock chroot via embedded Python script (render_process.py) * mock.Runner: WithUnprivileged()/HasUnprivileged() for --unpriv flag * RenderedSpecsDir config field in ProjectInfo The Python script runs inside the mock chroot, processing components in parallel via ThreadPoolExecutor and returning JSON results as a file.
1 parent 739de9f commit 7b46253

File tree

13 files changed

+901
-5
lines changed

13 files changed

+901
-5
lines changed

docs/user/reference/config/project.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,19 @@ The following fields are nested under the `[project]` TOML section:
1212
| Log directory | `log-dir` | string | No | Path to the directory where build logs are written (relative to this config file) |
1313
| Work directory | `work-dir` | string | No | Path to the temporary working directory for build artifacts (relative to this config file) |
1414
| Output directory | `output-dir` | string | No | Path to the directory where final build outputs (RPMs, SRPMs) are placed (relative to this config file) |
15+
| Rendered specs directory | `rendered-specs-dir` | string | No | Output directory for `component render` (relative to this config file) |
1516
| Default distro | `default-distro` | [DistroReference](distros.md#distro-references) | No | The default distro and version to use when building components |
1617

1718
> **Note:** `[default-package-config]` and `[package-groups]` are **top-level** TOML sections — they are not nested under `[project]`. They are documented in the sections below.
1819
1920
## Directory Paths
2021

21-
The `log-dir`, `work-dir`, and `output-dir` paths are resolved relative to the config file that defines them. These directories are created automatically by azldev as needed.
22+
The `log-dir`, `work-dir`, `output-dir`, and `rendered-specs-dir` paths are resolved relative to the config file that defines them. These directories are created automatically by azldev as needed.
2223

2324
- **`log-dir`** — build logs are written here (e.g., `azldev.log`)
2425
- **`work-dir`** — temporary per-component working directories are created under this path during builds (e.g., source preparation, SRPM construction)
2526
- **`output-dir`** — final build artifacts (RPMs, SRPMs) are placed here
27+
- **`rendered-specs-dir`** — rendered spec and sidecar files are written here by `azldev component render`
2628

2729
> **Note:** Do not edit files under these directories manually — they are managed by azldev and may be overwritten or cleaned at any time.
2830
Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package sources
5+
6+
import (
7+
"context"
8+
_ "embed"
9+
"encoding/json"
10+
"fmt"
11+
"log/slog"
12+
"path/filepath"
13+
"runtime"
14+
"strconv"
15+
"strings"
16+
"sync"
17+
18+
"github.com/microsoft/azure-linux-dev-tools/internal/global/opctx"
19+
"github.com/microsoft/azure-linux-dev-tools/internal/rpm/mock"
20+
"github.com/microsoft/azure-linux-dev-tools/internal/rpm/spectool"
21+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms"
22+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
23+
)
24+
25+
//go:embed render_process.py
26+
var renderProcessScript []byte
27+
28+
// MockProcessor provides a shared mock chroot for running rpmautospec and
29+
// spectool during component rendering. The chroot is lazily initialized on
30+
// first use and supports batch processing of multiple components in a single
31+
// mock invocation.
32+
type MockProcessor struct {
33+
mu sync.Mutex
34+
runner *mock.Runner
35+
initialized bool
36+
initErr error
37+
}
38+
39+
// NewMockProcessor creates a new processor that will lazily initialize
40+
// a mock chroot using the given config path. The runner is created eagerly
41+
// but the chroot is only initialized on first use.
42+
func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string) *MockProcessor {
43+
return &MockProcessor{
44+
runner: mock.NewRunner(ctx, mockConfigPath),
45+
}
46+
}
47+
48+
// ComponentInput describes a single component to process in the mock chroot.
49+
// Name must match the subdirectory name under the staging directory.
50+
type ComponentInput struct {
51+
Name string // Component name (matches subdirectory in staging dir)
52+
SpecFilename string // Just the filename, e.g., "curl.spec"
53+
}
54+
55+
// ComponentMockResult holds the mock processing result for one component.
56+
type ComponentMockResult struct {
57+
Name string // Component name
58+
SpecFiles []string // Files listed by spectool (basenames/relative paths)
59+
Error error // Non-nil if rpmautospec or spectool failed for this component
60+
}
61+
62+
// validateInputs validates all component inputs before batch processing.
63+
// Rejects empty names, path traversal, absolute paths, non-basename spec filenames,
64+
// and duplicate component names.
65+
func validateInputs(inputs []ComponentInput) error {
66+
seen := make(map[string]bool, len(inputs))
67+
68+
for _, input := range inputs {
69+
if err := validateComponentInput(input); err != nil {
70+
return err
71+
}
72+
73+
if seen[input.Name] {
74+
return fmt.Errorf("duplicate component name %#q", input.Name)
75+
}
76+
77+
seen[input.Name] = true
78+
}
79+
80+
return nil
81+
}
82+
83+
// isSimpleName returns true if s is a non-empty, single-component filename
84+
// without path separators, traversal sequences, or null bytes.
85+
func isSimpleName(s string) bool {
86+
return s != "" && s != "." && s != ".." &&
87+
!strings.ContainsAny(s, "/\\") &&
88+
!strings.Contains(s, "..") &&
89+
!strings.ContainsRune(s, 0)
90+
}
91+
92+
// validateComponentInput rejects component inputs that could cause path traversal
93+
// or other safety issues when used to construct paths inside the mock chroot.
94+
func validateComponentInput(input ComponentInput) error {
95+
if !isSimpleName(input.Name) {
96+
return fmt.Errorf(
97+
"invalid component name %#q: must be a simple name without path separators or traversal sequences", input.Name)
98+
}
99+
100+
if !isSimpleName(input.SpecFilename) {
101+
return fmt.Errorf("invalid spec filename %#q for component %#q", input.SpecFilename, input.Name)
102+
}
103+
104+
return nil
105+
}
106+
107+
// initOnce lazily initializes the mock chroot. Caller must hold p.mu.
108+
func (p *MockProcessor) initOnce(ctx context.Context) error {
109+
if p.initialized {
110+
return p.initErr
111+
}
112+
113+
slog.Info("Initializing mock chroot for rendering")
114+
115+
p.runner.EnableNetwork()
116+
117+
if err := p.runner.InitRoot(ctx); err != nil {
118+
p.initErr = fmt.Errorf("failed to initialize mock chroot:\n%w", err)
119+
p.initialized = true
120+
121+
return p.initErr
122+
}
123+
124+
// Install rpmautospec (macro expansion), rpmdevtools (spectool), and git
125+
// (required for rpmautospec to read commit history).
126+
// python3-click is required by rpmautospec but not declared as an RPM dependency.
127+
// Ecosystem macro packages (go-srpm-macros, etc.) are already present via
128+
// @buildsys-build → azurelinux-rpm-config.
129+
if err := p.runner.InstallPackages(ctx, []string{"rpmautospec", "rpmdevtools", "git", "python3-click"}); err != nil {
130+
p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err)
131+
p.initialized = true
132+
133+
return p.initErr
134+
}
135+
136+
p.initialized = true
137+
138+
slog.Info("Mock chroot ready for rendering")
139+
140+
return nil
141+
}
142+
143+
// BatchProcess runs rpmautospec and spectool for multiple components in a single
144+
// mock chroot invocation. stagingDir is the host directory containing one
145+
// subdirectory per component (named by ComponentInput.Name). A single bind
146+
// mount exposes the entire staging tree to the chroot.
147+
//
148+
// Components are processed in parallel inside the chroot by an embedded
149+
// Python script (render_process.py) which returns a JSON file, and reports
150+
// per-component progress on stderr (mapped by mock to stdout).
151+
func (p *MockProcessor) BatchProcess(
152+
ctx context.Context, events opctx.EventListener,
153+
stagingDir string, inputs []ComponentInput, fs opctx.FS,
154+
) ([]ComponentMockResult, error) {
155+
p.mu.Lock()
156+
defer p.mu.Unlock()
157+
158+
if len(inputs) == 0 {
159+
return nil, nil
160+
}
161+
162+
if err := validateInputs(inputs); err != nil {
163+
return nil, err
164+
}
165+
166+
if err := p.initOnce(ctx); err != nil {
167+
return nil, err
168+
}
169+
170+
slog.Info("Batch processing components in mock chroot", "count", len(inputs))
171+
172+
// Write the Python script and inputs manifest to the staging directory.
173+
scriptPath := filepath.Join(stagingDir, "render_process.py")
174+
if err := fileutils.WriteFile(fs, scriptPath, renderProcessScript, fileperms.PublicExecutable); err != nil {
175+
return nil, fmt.Errorf("writing render script:\n%w", err)
176+
}
177+
178+
if err := writeInputsManifest(fs, stagingDir, inputs); err != nil {
179+
return nil, err
180+
}
181+
182+
// Clone the runner and add a single bind mount for the staging directory.
183+
// WithUnprivileged drops to the mockbuild user for chroot commands,
184+
// matching how mock builds run and avoiding root-owned files in the
185+
// bind-mounted staging directory. This is safe because mock defaults
186+
// chrootuid to os.getuid() — the mockbuild user inside the chroot has
187+
// the same UID as the host user, so bind-mounted files remain writable.
188+
runner := p.runner.Clone()
189+
runner.WithUnprivileged()
190+
191+
const chrootStagingPath = "/tmp/render"
192+
runner.AddBindMount(stagingDir, chrootStagingPath)
193+
194+
chrootScript := filepath.Join(chrootStagingPath, "render_process.py")
195+
workers := strconv.Itoa(max(1, runtime.NumCPU())) // 1x CPU; mock work is CPU-bound
196+
args := []string{"python3", chrootScript, chrootStagingPath, workers}
197+
198+
cmd, err := runner.CmdInChroot(ctx, args, false)
199+
if err != nil {
200+
return nil, fmt.Errorf("failed to create batch command in mock:\n%w", err)
201+
}
202+
203+
// Set up progress reporting from the Python script's output.
204+
// The script prints "PROGRESS <completed>/<total> <name>" to stderr, but
205+
// mock --chroot merges the inner command's stderr into stdout, so we
206+
// listen on stdout.
207+
mockProgress := events.StartEvent("Processing specs in mock chroot", "count", len(inputs))
208+
mockProgress.SetLongRunning("Processing specs in mock chroot")
209+
210+
defer mockProgress.End()
211+
212+
total := int64(len(inputs))
213+
214+
if listenerErr := cmd.SetRealTimeStdoutListener(func(_ context.Context, line string) {
215+
// Parse "PROGRESS <i>/<total> <name>" lines.
216+
if after, found := strings.CutPrefix(line, "PROGRESS "); found {
217+
if slashIdx := strings.Index(after, "/"); slashIdx > 0 {
218+
if completed, parseErr := strconv.ParseInt(after[:slashIdx], 10, 64); parseErr == nil {
219+
mockProgress.SetProgress(completed, total)
220+
}
221+
}
222+
}
223+
}); listenerErr != nil {
224+
slog.Warn("Failed to set stdout listener for progress", "error", listenerErr)
225+
}
226+
227+
if runErr := cmd.Run(ctx); runErr != nil {
228+
slog.Warn("Batch mock script exited with error", "error", runErr)
229+
230+
return nil, fmt.Errorf("batch mock processing failed:\n%w", runErr)
231+
}
232+
233+
// Read results from the file written by the Python script.
234+
// Using a file avoids bufio.Scanner token size limits that would truncate
235+
// large JSON payloads when capturing stdout (e.g., 7k components ≈ 560KB).
236+
resultsPath := filepath.Join(stagingDir, "results.json")
237+
238+
resultsData, readErr := fileutils.ReadFile(fs, resultsPath)
239+
if readErr != nil {
240+
return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsPath, readErr)
241+
}
242+
243+
return parseBatchJSON(string(resultsData), inputs)
244+
}
245+
246+
// componentInputJSON is the JSON-serializable form written to inputs.json.
247+
type componentInputJSON struct {
248+
Name string `json:"name"`
249+
SpecFilename string `json:"specFilename"`
250+
}
251+
252+
// componentResultJSON mirrors the JSON output from render_process.py.
253+
type componentResultJSON struct {
254+
Name string `json:"name"`
255+
SpecFiles string `json:"specFiles"`
256+
Error *string `json:"error"`
257+
}
258+
259+
// parseBatchJSON parses the JSON array produced by render_process.py into
260+
// ComponentMockResult values. The spectool output (raw lines) is parsed into
261+
// individual filenames.
262+
func parseBatchJSON(stdout string, inputs []ComponentInput) ([]ComponentMockResult, error) {
263+
var jsonResults []componentResultJSON
264+
if err := json.Unmarshal([]byte(stdout), &jsonResults); err != nil {
265+
return nil, fmt.Errorf("parsing batch results JSON:\n%w", err)
266+
}
267+
268+
// Build a lookup map from the JSON results.
269+
resultMap := make(map[string]*componentResultJSON, len(jsonResults))
270+
for idx := range jsonResults {
271+
resultMap[jsonResults[idx].Name] = &jsonResults[idx]
272+
}
273+
274+
results := make([]ComponentMockResult, len(inputs))
275+
276+
for idx, input := range inputs {
277+
results[idx].Name = input.Name
278+
279+
compResult, ok := resultMap[input.Name]
280+
if !ok {
281+
results[idx].Error = fmt.Errorf("no result returned for %#q", input.Name)
282+
283+
continue
284+
}
285+
286+
if compResult.Error != nil {
287+
results[idx].Error = fmt.Errorf("%s", *compResult.Error)
288+
289+
continue
290+
}
291+
292+
results[idx].SpecFiles = spectool.ParseSpectoolOutput(compResult.SpecFiles)
293+
}
294+
295+
return results, nil
296+
}
297+
298+
// writeInputsManifest writes the inputs.json manifest to the staging directory
299+
// so it can be read by the Python script inside the mock chroot.
300+
func writeInputsManifest(fs opctx.FS, stagingDir string, inputs []ComponentInput) error {
301+
jsonInputs := make([]componentInputJSON, len(inputs))
302+
for idx, input := range inputs {
303+
jsonInputs[idx] = componentInputJSON(input)
304+
}
305+
306+
data, err := json.Marshal(jsonInputs)
307+
if err != nil {
308+
return fmt.Errorf("marshaling inputs:\n%w", err)
309+
}
310+
311+
inputsPath := filepath.Join(stagingDir, "inputs.json")
312+
if err := fileutils.WriteFile(fs, inputsPath, data, fileperms.PublicFile); err != nil {
313+
return fmt.Errorf("writing inputs manifest:\n%w", err)
314+
}
315+
316+
return nil
317+
}
318+
319+
// Destroy cleans up the mock chroot. Should be called when rendering is complete.
320+
// The processor must not be reused after Destroy — create a new MockProcessor if needed.
321+
// Attempts cleanup even if initialization partially failed (e.g., InitRoot succeeded
322+
// but InstallPackages failed), since a partially initialized chroot still needs scrubbing.
323+
func (p *MockProcessor) Destroy(ctx context.Context) {
324+
p.mu.Lock()
325+
defer p.mu.Unlock()
326+
327+
if p.runner != nil && p.initialized {
328+
slog.Debug("Destroying mock chroot")
329+
330+
if err := p.runner.ScrubRoot(ctx); err != nil {
331+
slog.Warn("Failed to clean up mock chroot", "error", err)
332+
}
333+
}
334+
}

0 commit comments

Comments
 (0)