|
| 1 | +// Copyright (c) Microsoft Corporation. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +package sources |
| 5 | + |
| 6 | +import ( |
| 7 | + "context" |
| 8 | + _ "embed" |
| 9 | + "encoding/json" |
| 10 | + "fmt" |
| 11 | + "log/slog" |
| 12 | + "path/filepath" |
| 13 | + "runtime" |
| 14 | + "strconv" |
| 15 | + "strings" |
| 16 | + "sync" |
| 17 | + |
| 18 | + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" |
| 19 | + "github.com/microsoft/azure-linux-dev-tools/internal/rpm/mock" |
| 20 | + "github.com/microsoft/azure-linux-dev-tools/internal/rpm/spectool" |
| 21 | + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" |
| 22 | + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" |
| 23 | +) |
| 24 | + |
| 25 | +//go:embed render_process.py |
| 26 | +var renderProcessScript []byte |
| 27 | + |
| 28 | +// MockProcessor provides a shared mock chroot for running rpmautospec and |
| 29 | +// spectool during component rendering. The chroot is lazily initialized on |
| 30 | +// first use and supports batch processing of multiple components in a single |
| 31 | +// mock invocation. |
| 32 | +type MockProcessor struct { |
| 33 | + mu sync.Mutex |
| 34 | + runner *mock.Runner |
| 35 | + initialized bool |
| 36 | + initErr error |
| 37 | +} |
| 38 | + |
| 39 | +// NewMockProcessor creates a new processor that will lazily initialize |
| 40 | +// a mock chroot using the given config path. The runner is created eagerly |
| 41 | +// but the chroot is only initialized on first use. |
| 42 | +func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string) *MockProcessor { |
| 43 | + return &MockProcessor{ |
| 44 | + runner: mock.NewRunner(ctx, mockConfigPath), |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +// ComponentInput describes a single component to process in the mock chroot. |
| 49 | +// Name must match the subdirectory name under the staging directory. |
| 50 | +type ComponentInput struct { |
| 51 | + Name string // Component name (matches subdirectory in staging dir) |
| 52 | + SpecFilename string // Just the filename, e.g., "curl.spec" |
| 53 | +} |
| 54 | + |
| 55 | +// ComponentMockResult holds the mock processing result for one component. |
| 56 | +type ComponentMockResult struct { |
| 57 | + Name string // Component name |
| 58 | + SpecFiles []string // Files listed by spectool (basenames/relative paths) |
| 59 | + Error error // Non-nil if rpmautospec or spectool failed for this component |
| 60 | +} |
| 61 | + |
| 62 | +// validateInputs validates all component inputs before batch processing. |
| 63 | +// Rejects empty names, path traversal, absolute paths, non-basename spec filenames, |
| 64 | +// and duplicate component names. |
| 65 | +func validateInputs(inputs []ComponentInput) error { |
| 66 | + seen := make(map[string]bool, len(inputs)) |
| 67 | + |
| 68 | + for _, input := range inputs { |
| 69 | + if err := validateComponentInput(input); err != nil { |
| 70 | + return err |
| 71 | + } |
| 72 | + |
| 73 | + if seen[input.Name] { |
| 74 | + return fmt.Errorf("duplicate component name %#q", input.Name) |
| 75 | + } |
| 76 | + |
| 77 | + seen[input.Name] = true |
| 78 | + } |
| 79 | + |
| 80 | + return nil |
| 81 | +} |
| 82 | + |
| 83 | +// isSimpleName returns true if s is a non-empty, single-component filename |
| 84 | +// without path separators, traversal sequences, or null bytes. |
| 85 | +func isSimpleName(s string) bool { |
| 86 | + return s != "" && s != "." && s != ".." && |
| 87 | + !strings.ContainsAny(s, "/\\") && |
| 88 | + !strings.Contains(s, "..") && |
| 89 | + !strings.ContainsRune(s, 0) |
| 90 | +} |
| 91 | + |
| 92 | +// validateComponentInput rejects component inputs that could cause path traversal |
| 93 | +// or other safety issues when used to construct paths inside the mock chroot. |
| 94 | +func validateComponentInput(input ComponentInput) error { |
| 95 | + if !isSimpleName(input.Name) { |
| 96 | + return fmt.Errorf( |
| 97 | + "invalid component name %#q: must be a simple name without path separators or traversal sequences", input.Name) |
| 98 | + } |
| 99 | + |
| 100 | + if !isSimpleName(input.SpecFilename) { |
| 101 | + return fmt.Errorf("invalid spec filename %#q for component %#q", input.SpecFilename, input.Name) |
| 102 | + } |
| 103 | + |
| 104 | + return nil |
| 105 | +} |
| 106 | + |
| 107 | +// initOnce lazily initializes the mock chroot. Caller must hold p.mu. |
| 108 | +func (p *MockProcessor) initOnce(ctx context.Context) error { |
| 109 | + if p.initialized { |
| 110 | + return p.initErr |
| 111 | + } |
| 112 | + |
| 113 | + slog.Info("Initializing mock chroot for rendering") |
| 114 | + |
| 115 | + p.runner.EnableNetwork() |
| 116 | + |
| 117 | + if err := p.runner.InitRoot(ctx); err != nil { |
| 118 | + p.initErr = fmt.Errorf("failed to initialize mock chroot:\n%w", err) |
| 119 | + p.initialized = true |
| 120 | + |
| 121 | + return p.initErr |
| 122 | + } |
| 123 | + |
| 124 | + // Install rpmautospec (macro expansion), rpmdevtools (spectool), and git |
| 125 | + // (required for rpmautospec to read commit history). |
| 126 | + // python3-click is required by rpmautospec but not declared as an RPM dependency. |
| 127 | + // Ecosystem macro packages (go-srpm-macros, etc.) are already present via |
| 128 | + // @buildsys-build → azurelinux-rpm-config. |
| 129 | + if err := p.runner.InstallPackages(ctx, []string{"rpmautospec", "rpmdevtools", "git", "python3-click"}); err != nil { |
| 130 | + p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err) |
| 131 | + p.initialized = true |
| 132 | + |
| 133 | + return p.initErr |
| 134 | + } |
| 135 | + |
| 136 | + p.initialized = true |
| 137 | + |
| 138 | + slog.Info("Mock chroot ready for rendering") |
| 139 | + |
| 140 | + return nil |
| 141 | +} |
| 142 | + |
| 143 | +// BatchProcess runs rpmautospec and spectool for multiple components in a single |
| 144 | +// mock chroot invocation. stagingDir is the host directory containing one |
| 145 | +// subdirectory per component (named by ComponentInput.Name). A single bind |
| 146 | +// mount exposes the entire staging tree to the chroot. |
| 147 | +// |
| 148 | +// Components are processed in parallel inside the chroot by an embedded |
| 149 | +// Python script (render_process.py) which returns a JSON file, and reports |
| 150 | +// per-component progress on stderr (mapped by mock to stdout). |
| 151 | +func (p *MockProcessor) BatchProcess( |
| 152 | + ctx context.Context, events opctx.EventListener, |
| 153 | + stagingDir string, inputs []ComponentInput, fs opctx.FS, |
| 154 | +) ([]ComponentMockResult, error) { |
| 155 | + p.mu.Lock() |
| 156 | + defer p.mu.Unlock() |
| 157 | + |
| 158 | + if len(inputs) == 0 { |
| 159 | + return nil, nil |
| 160 | + } |
| 161 | + |
| 162 | + if err := validateInputs(inputs); err != nil { |
| 163 | + return nil, err |
| 164 | + } |
| 165 | + |
| 166 | + if err := p.initOnce(ctx); err != nil { |
| 167 | + return nil, err |
| 168 | + } |
| 169 | + |
| 170 | + slog.Info("Batch processing components in mock chroot", "count", len(inputs)) |
| 171 | + |
| 172 | + // Write the Python script and inputs manifest to the staging directory. |
| 173 | + scriptPath := filepath.Join(stagingDir, "render_process.py") |
| 174 | + if err := fileutils.WriteFile(fs, scriptPath, renderProcessScript, fileperms.PublicExecutable); err != nil { |
| 175 | + return nil, fmt.Errorf("writing render script:\n%w", err) |
| 176 | + } |
| 177 | + |
| 178 | + if err := writeInputsManifest(fs, stagingDir, inputs); err != nil { |
| 179 | + return nil, err |
| 180 | + } |
| 181 | + |
| 182 | + // Clone the runner and add a single bind mount for the staging directory. |
| 183 | + // WithUnprivileged drops to the mockbuild user for chroot commands, |
| 184 | + // matching how mock builds run and avoiding root-owned files in the |
| 185 | + // bind-mounted staging directory. This is safe because mock defaults |
| 186 | + // chrootuid to os.getuid() — the mockbuild user inside the chroot has |
| 187 | + // the same UID as the host user, so bind-mounted files remain writable. |
| 188 | + runner := p.runner.Clone() |
| 189 | + runner.WithUnprivileged() |
| 190 | + |
| 191 | + const chrootStagingPath = "/tmp/render" |
| 192 | + runner.AddBindMount(stagingDir, chrootStagingPath) |
| 193 | + |
| 194 | + chrootScript := filepath.Join(chrootStagingPath, "render_process.py") |
| 195 | + workers := strconv.Itoa(max(1, runtime.NumCPU())) // 1x CPU; mock work is CPU-bound |
| 196 | + args := []string{"python3", chrootScript, chrootStagingPath, workers} |
| 197 | + |
| 198 | + cmd, err := runner.CmdInChroot(ctx, args, false) |
| 199 | + if err != nil { |
| 200 | + return nil, fmt.Errorf("failed to create batch command in mock:\n%w", err) |
| 201 | + } |
| 202 | + |
| 203 | + // Set up progress reporting from the Python script's output. |
| 204 | + // The script prints "PROGRESS <completed>/<total> <name>" to stderr, but |
| 205 | + // mock --chroot merges the inner command's stderr into stdout, so we |
| 206 | + // listen on stdout. |
| 207 | + mockProgress := events.StartEvent("Processing specs in mock chroot", "count", len(inputs)) |
| 208 | + mockProgress.SetLongRunning("Processing specs in mock chroot") |
| 209 | + |
| 210 | + defer mockProgress.End() |
| 211 | + |
| 212 | + total := int64(len(inputs)) |
| 213 | + |
| 214 | + if listenerErr := cmd.SetRealTimeStdoutListener(func(_ context.Context, line string) { |
| 215 | + // Parse "PROGRESS <i>/<total> <name>" lines. |
| 216 | + if after, found := strings.CutPrefix(line, "PROGRESS "); found { |
| 217 | + if slashIdx := strings.Index(after, "/"); slashIdx > 0 { |
| 218 | + if completed, parseErr := strconv.ParseInt(after[:slashIdx], 10, 64); parseErr == nil { |
| 219 | + mockProgress.SetProgress(completed, total) |
| 220 | + } |
| 221 | + } |
| 222 | + } |
| 223 | + }); listenerErr != nil { |
| 224 | + slog.Warn("Failed to set stdout listener for progress", "error", listenerErr) |
| 225 | + } |
| 226 | + |
| 227 | + if runErr := cmd.Run(ctx); runErr != nil { |
| 228 | + slog.Warn("Batch mock script exited with error", "error", runErr) |
| 229 | + |
| 230 | + return nil, fmt.Errorf("batch mock processing failed:\n%w", runErr) |
| 231 | + } |
| 232 | + |
| 233 | + // Read results from the file written by the Python script. |
| 234 | + // Using a file avoids bufio.Scanner token size limits that would truncate |
| 235 | + // large JSON payloads when capturing stdout (e.g., 7k components ≈ 560KB). |
| 236 | + resultsPath := filepath.Join(stagingDir, "results.json") |
| 237 | + |
| 238 | + resultsData, readErr := fileutils.ReadFile(fs, resultsPath) |
| 239 | + if readErr != nil { |
| 240 | + return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsPath, readErr) |
| 241 | + } |
| 242 | + |
| 243 | + return parseBatchJSON(string(resultsData), inputs) |
| 244 | +} |
| 245 | + |
| 246 | +// componentInputJSON is the JSON-serializable form written to inputs.json. |
| 247 | +type componentInputJSON struct { |
| 248 | + Name string `json:"name"` |
| 249 | + SpecFilename string `json:"specFilename"` |
| 250 | +} |
| 251 | + |
| 252 | +// componentResultJSON mirrors the JSON output from render_process.py. |
| 253 | +type componentResultJSON struct { |
| 254 | + Name string `json:"name"` |
| 255 | + SpecFiles string `json:"specFiles"` |
| 256 | + Error *string `json:"error"` |
| 257 | +} |
| 258 | + |
| 259 | +// parseBatchJSON parses the JSON array produced by render_process.py into |
| 260 | +// ComponentMockResult values. The spectool output (raw lines) is parsed into |
| 261 | +// individual filenames. |
| 262 | +func parseBatchJSON(stdout string, inputs []ComponentInput) ([]ComponentMockResult, error) { |
| 263 | + var jsonResults []componentResultJSON |
| 264 | + if err := json.Unmarshal([]byte(stdout), &jsonResults); err != nil { |
| 265 | + return nil, fmt.Errorf("parsing batch results JSON:\n%w", err) |
| 266 | + } |
| 267 | + |
| 268 | + // Build a lookup map from the JSON results. |
| 269 | + resultMap := make(map[string]*componentResultJSON, len(jsonResults)) |
| 270 | + for idx := range jsonResults { |
| 271 | + resultMap[jsonResults[idx].Name] = &jsonResults[idx] |
| 272 | + } |
| 273 | + |
| 274 | + results := make([]ComponentMockResult, len(inputs)) |
| 275 | + |
| 276 | + for idx, input := range inputs { |
| 277 | + results[idx].Name = input.Name |
| 278 | + |
| 279 | + compResult, ok := resultMap[input.Name] |
| 280 | + if !ok { |
| 281 | + results[idx].Error = fmt.Errorf("no result returned for %#q", input.Name) |
| 282 | + |
| 283 | + continue |
| 284 | + } |
| 285 | + |
| 286 | + if compResult.Error != nil { |
| 287 | + results[idx].Error = fmt.Errorf("%s", *compResult.Error) |
| 288 | + |
| 289 | + continue |
| 290 | + } |
| 291 | + |
| 292 | + results[idx].SpecFiles = spectool.ParseSpectoolOutput(compResult.SpecFiles) |
| 293 | + } |
| 294 | + |
| 295 | + return results, nil |
| 296 | +} |
| 297 | + |
| 298 | +// writeInputsManifest writes the inputs.json manifest to the staging directory |
| 299 | +// so it can be read by the Python script inside the mock chroot. |
| 300 | +func writeInputsManifest(fs opctx.FS, stagingDir string, inputs []ComponentInput) error { |
| 301 | + jsonInputs := make([]componentInputJSON, len(inputs)) |
| 302 | + for idx, input := range inputs { |
| 303 | + jsonInputs[idx] = componentInputJSON(input) |
| 304 | + } |
| 305 | + |
| 306 | + data, err := json.Marshal(jsonInputs) |
| 307 | + if err != nil { |
| 308 | + return fmt.Errorf("marshaling inputs:\n%w", err) |
| 309 | + } |
| 310 | + |
| 311 | + inputsPath := filepath.Join(stagingDir, "inputs.json") |
| 312 | + if err := fileutils.WriteFile(fs, inputsPath, data, fileperms.PublicFile); err != nil { |
| 313 | + return fmt.Errorf("writing inputs manifest:\n%w", err) |
| 314 | + } |
| 315 | + |
| 316 | + return nil |
| 317 | +} |
| 318 | + |
| 319 | +// Destroy cleans up the mock chroot. Should be called when rendering is complete. |
| 320 | +// The processor must not be reused after Destroy — create a new MockProcessor if needed. |
| 321 | +// Attempts cleanup even if initialization partially failed (e.g., InitRoot succeeded |
| 322 | +// but InstallPackages failed), since a partially initialized chroot still needs scrubbing. |
| 323 | +func (p *MockProcessor) Destroy(ctx context.Context) { |
| 324 | + p.mu.Lock() |
| 325 | + defer p.mu.Unlock() |
| 326 | + |
| 327 | + if p.runner != nil && p.initialized { |
| 328 | + slog.Debug("Destroying mock chroot") |
| 329 | + |
| 330 | + if err := p.runner.ScrubRoot(ctx); err != nil { |
| 331 | + slog.Warn("Failed to clean up mock chroot", "error", err) |
| 332 | + } |
| 333 | + } |
| 334 | +} |
0 commit comments