Skip to content

Commit ced4c2f

Browse files
author
Antonio Salinas
authored
fix: handle missing sources files + sources files with md5 hashes (#415)
1 parent 31816e1 commit ced4c2f

4 files changed

Lines changed: 205 additions & 64 deletions

File tree

internal/providers/sourceproviders/fedorasource/fedorasource.go

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
"github.com/gim-home/azldev-preview/internal/global/opctx"
1818
"github.com/gim-home/azldev-preview/internal/utils/downloader"
1919
"github.com/gim-home/azldev-preview/internal/utils/fileutils"
20-
"github.com/spf13/afero"
2120
)
2221

2322
type FedoraSourceDownloader interface {
@@ -43,7 +42,16 @@ var _ FedoraSourceDownloader = (*FedoraSourceDownloaderImpl)(nil)
4342
// 1: Hash algorithm (e.g., "SHA512")
4443
// 2: Filename (e.g., "example-1.0.tar.gz")
4544
// 3: Hash value (e.g., "a1b2c3d4e5f6...")
46-
var sourcesFilePattern = regexp.MustCompile(`^([A-Z0-9]+)\s+\(([^)]+)\)\s+=\s+([a-f0-9]+)$`)
45+
var sourcesFilePattern = regexp.MustCompile(`^([A-Z0-9]+)\s+\(([^)]+)\)\s+=\s+([a-fA-F0-9]+)$`)
46+
47+
// sourcesFileLegacyPattern matches lines in the legacy sources file format.
48+
// This is the older format used by some packages, typically with MD5 hashes.
49+
// Example line: "7b74551e63f8ee6aab6fbc86676c0d37 zip30.tar.gz"
50+
// Capture groups:
51+
//
52+
// 1: Hash value (e.g., "7b74551e63f8ee6aab6fbc86676c0d37")
53+
// 2: Filename (e.g., "zip30.tar.gz")
54+
var sourcesFileLegacyPattern = regexp.MustCompile(`^([a-fA-F0-9]+)\s+(\S+)$`)
4755

4856
// Capture group indexes for sourcesFilePattern.
4957
const (
@@ -52,6 +60,12 @@ const (
5260
sourcesPatternHashValueIndex = 3
5361
)
5462

63+
// Capture group indexes for sourcesFileLegacyPattern.
64+
const (
65+
sourcesLegacyPatternHashValueIndex = 1
66+
sourcesLegacyPatternFilenameIndex = 2
67+
)
68+
5569
// sourceFileInfo contains metadata about a source file to be downloaded.
5670
type sourceFileInfo struct {
5771
fileName string
@@ -114,9 +128,24 @@ func (g *FedoraSourceDownloaderImpl) ExtractSourcesFromRepo(
114128

115129
sourcesFilePath := filepath.Join(repoDir, "sources")
116130

117-
sourceFiles, err := g.extractSourcesInfo(sourcesFilePath, packageName, lookasideBaseURI)
131+
sourcesExists, err := fileutils.Exists(g.fileSystem, sourcesFilePath)
118132
if err != nil {
119-
return fmt.Errorf("failed to extract sources info from sources file at %#q:\n%w", sourcesFilePath, err)
133+
return fmt.Errorf("failed to check if sources file exists at %#q:\n%w", sourcesFilePath, err)
134+
}
135+
136+
// If the sources file does not exist, there are no external sources to download
137+
if !sourcesExists {
138+
return nil
139+
}
140+
141+
sourcesContent, err := fileutils.ReadFile(g.fileSystem, sourcesFilePath)
142+
if err != nil {
143+
return fmt.Errorf("failed to read sources file at %#q:\n%w", sourcesFilePath, err)
144+
}
145+
146+
sourceFiles, err := parseSourcesFile(string(sourcesContent), packageName, lookasideBaseURI)
147+
if err != nil {
148+
return fmt.Errorf("failed to parse sources file at %#q:\n%w", sourcesFilePath, err)
120149
}
121150

122151
err = g.downloadAndVerifySources(ctx, sourceFiles, repoDir)
@@ -168,36 +197,48 @@ func (g *FedoraSourceDownloaderImpl) validateDownloadedFile(
168197
return nil
169198
}
170199

171-
func (g *FedoraSourceDownloaderImpl) extractSourcesInfo(
172-
sourcesFilePath string,
173-
packageName string,
174-
lookasideBaseURI string,
175-
) ([]sourceFileInfo, error) {
176-
sourcesContent, err := afero.ReadFile(g.fileSystem, sourcesFilePath)
177-
if err != nil {
178-
return nil, fmt.Errorf("failed to read sources file at %#q:\n%w", sourcesFilePath, err)
179-
}
180-
200+
// parseSourcesFile parses the content of a Fedora/RHEL sources file and returns
201+
// the list of source files to download. It supports both the modern format
202+
// (e.g., "SHA512 (file.tar.gz) = abc123...") and the legacy MD5 format
203+
// (e.g., "abc123... file.tar.gz").
204+
func parseSourcesFile(content string, packageName string, lookasideBaseURI string) ([]sourceFileInfo, error) {
181205
sourceFiles := []sourceFileInfo{}
182206

183207
// Parse and validate each line in the sources file
184-
lines := strings.Split(string(sourcesContent), "\n")
208+
lines := strings.Split(content, "\n")
185209
for lineNum, line := range lines {
186210
line = strings.TrimSpace(line)
187211

188212
if line == "" || strings.HasPrefix(line, "#") {
189213
continue
190214
}
191215

216+
var (
217+
hashType string
218+
fileName string
219+
hash string
220+
)
221+
222+
// Try modern format first
192223
matches := sourcesFilePattern.FindStringSubmatch(line)
193-
if matches == nil {
194-
return nil, fmt.Errorf("invalid format in sources file at line %d: %#q", lineNum+1, line)
224+
if matches != nil {
225+
hashType = matches[sourcesPatternHashTypeIndex]
226+
fileName = matches[sourcesPatternFilenameIndex]
227+
hash = matches[sourcesPatternHashValueIndex]
228+
} else {
229+
// Try legacy format before failing
230+
legacyMatches := sourcesFileLegacyPattern.FindStringSubmatch(line)
231+
if legacyMatches == nil {
232+
return nil, fmt.Errorf("invalid format in sources file at line %d: %#q", lineNum+1, line)
233+
}
234+
235+
hash = legacyMatches[sourcesLegacyPatternHashValueIndex]
236+
fileName = legacyMatches[sourcesLegacyPatternFilenameIndex]
237+
238+
// Legacy format historically only used MD5
239+
hashType = "MD5"
195240
}
196241

197-
hashType := matches[sourcesPatternHashTypeIndex]
198-
fileName := matches[sourcesPatternFilenameIndex]
199-
hash := matches[sourcesPatternHashValueIndex]
200-
201242
sourceURI := lookasideBaseURI
202243
sourceURI = strings.ReplaceAll(sourceURI, "$pkg", packageName)
203244
sourceURI = strings.ReplaceAll(sourceURI, "$filename", fileName)

internal/providers/sourceproviders/fedorasource/fedorasource_test.go

Lines changed: 111 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4-
package fedorasource_test
4+
//nolint:testpackage // Allow to test private functions
5+
package fedorasource
56

67
import (
78
"context"
89
"path/filepath"
910
"testing"
1011

1112
"github.com/gim-home/azldev-preview/internal/global/testctx"
12-
"github.com/gim-home/azldev-preview/internal/providers/sourceproviders/fedorasource"
1313
"github.com/gim-home/azldev-preview/internal/utils/downloader/downloader_test"
14+
"github.com/gim-home/azldev-preview/internal/utils/fileutils"
1415
"github.com/spf13/afero"
1516
"github.com/stretchr/testify/assert"
1617
"github.com/stretchr/testify/require"
@@ -22,18 +23,13 @@ const (
2223
testPackageName = "test-package"
2324
testRepoDir = "/test/repo"
2425
testEmptyRepoDir = "/test/empty-repo"
25-
testInvalidDir = "/test/invalid-formats"
2626
testFilePerms = 0o644
2727
testDirPerms = 0o755
2828

2929
// Test source file data.
3030
testSourcesContent = `SHA512 (example-1.0.tar.gz) = af5ae0eb4ad9c3f07b7d78ec9dfd03f6a00c257df6b829b21051d4ba2d106bf` +
3131
`9d2f7563c0373b45e0ce5b1ad8a3bad9c05a2769547e67f4bc53692950db0ba37
3232
SHA256 (patch-1.patch) = 67899aaa0f2f55e55e715cb65596449cb29bb0a76a764fe8f1e51bf4d0af648f
33-
`
34-
testInvalidSourcesContent = `SHA512 (example.tar.gz)
35-
this is not a valid line
36-
SHA512 example.tar.gz = abc123
3733
`
3834
testSingleSourceContent = `SHA512 (example-1.0.tar.gz) = abc123def456
3935
`
@@ -55,7 +51,7 @@ func TestNewFedoraRepoExtractorImpl(t *testing.T) {
5551
ctx := testctx.NewCtx()
5652
mockDownloader := downloader_test.NewMockDownloader(ctrl)
5753

58-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
54+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
5955

6056
require.NoError(t, err)
6157
require.NotNil(t, extractor)
@@ -67,17 +63,17 @@ func TestNewFedoraRepoExtractorImplValidation(t *testing.T) {
6763
mockDownloader := downloader_test.NewMockDownloader(ctrl)
6864

6965
t.Run("nil dryRunnable should fail", func(t *testing.T) {
70-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(nil, ctx.FS(), mockDownloader)
66+
extractor, err := NewFedoraRepoExtractorImpl(nil, ctx.FS(), mockDownloader)
7167
require.Error(t, err)
7268
require.Nil(t, extractor)
7369
})
7470
t.Run("nil filesystem should fail", func(t *testing.T) {
75-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, nil, mockDownloader)
71+
extractor, err := NewFedoraRepoExtractorImpl(ctx, nil, mockDownloader)
7672
require.Error(t, err)
7773
require.Nil(t, extractor)
7874
})
7975
t.Run("nil downloader should fail", func(t *testing.T) {
80-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), nil)
76+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), nil)
8177
require.Error(t, err)
8278
require.Nil(t, extractor)
8379
})
@@ -88,7 +84,7 @@ func TestExtractSourcesFromRepo(t *testing.T) {
8884
ctx := testctx.NewCtx()
8985
mockDownloader := downloader_test.NewMockDownloader(ctrl)
9086

91-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
87+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
9288
require.NoError(t, err)
9389

9490
require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms))
@@ -122,7 +118,7 @@ func TestExtractSourcesFromRepoValidation(t *testing.T) {
122118
ctx := testctx.NewCtx()
123119
mockDownloader := downloader_test.NewMockDownloader(ctrl)
124120

125-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
121+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
126122
require.NoError(t, err)
127123

128124
t.Run("empty repo dir", func(t *testing.T) {
@@ -137,43 +133,21 @@ func TestExtractSourcesFromRepoValidation(t *testing.T) {
137133
assert.Contains(t, err.Error(), "lookaside base URI cannot be empty")
138134
})
139135

140-
t.Run("lookaside URI missing placeholders", func(t *testing.T) {
141-
err := extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, "missing-placeholders")
142-
require.Error(t, err)
143-
assert.Contains(t, err.Error(), "lookaside base URI is missing required placeholder")
144-
})
145-
146136
t.Run("missing sources file", func(t *testing.T) {
147137
require.NoError(t, ctx.FS().MkdirAll(testEmptyRepoDir, 0o755))
148138

139+
// Missing sources file is valid - it means no external sources to download
149140
err := extractor.ExtractSourcesFromRepo(context.Background(), testEmptyRepoDir, testPackageName, testLookasideURI)
150-
require.Error(t, err)
151-
assert.Contains(t, err.Error(), "failed to read sources file")
141+
require.NoError(t, err)
152142
})
153143
}
154144

155-
func TestExtractSourcesFromRepoInvalidFormats(t *testing.T) {
156-
ctrl := gomock.NewController(t)
157-
ctx := testctx.NewCtx()
158-
mockDownloader := downloader_test.NewMockDownloader(ctrl)
159-
160-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
161-
require.NoError(t, err)
162-
163-
require.NoError(t, ctx.FS().MkdirAll(testInvalidDir, testDirPerms))
164-
setupSourcesFile(t, ctx.FS(), testInvalidDir, testInvalidSourcesContent)
165-
166-
err = extractor.ExtractSourcesFromRepo(context.Background(), testInvalidDir, testPackageName, testLookasideURI)
167-
require.Error(t, err)
168-
assert.Contains(t, err.Error(), "invalid format in sources file at line")
169-
}
170-
171145
func TestExtractSourcesFromRepoDownloadFailure(t *testing.T) {
172146
ctrl := gomock.NewController(t)
173147
ctx := testctx.NewCtx()
174148
mockDownloader := downloader_test.NewMockDownloader(ctrl)
175149

176-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
150+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
177151
require.NoError(t, err)
178152

179153
require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms))
@@ -194,7 +168,7 @@ func TestExtractSourcesFromRepoHashMismatch(t *testing.T) {
194168
ctx := testctx.NewCtx()
195169
mockDownloader := downloader_test.NewMockDownloader(ctrl)
196170

197-
extractor, err := fedorasource.NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
171+
extractor, err := NewFedoraRepoExtractorImpl(ctx, ctx.FS(), mockDownloader)
198172
require.NoError(t, err)
199173

200174
require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms))
@@ -219,3 +193,101 @@ func setupSourcesFile(t *testing.T, fs afero.Fs, repoDir string, content string)
219193
sourcesPath := filepath.Join(repoDir, testSourcesFile)
220194
require.NoError(t, afero.WriteFile(fs, sourcesPath, []byte(content), testFilePerms))
221195
}
196+
197+
func TestParseSourcesFile(t *testing.T) {
198+
t.Run("modern format parses hash type and filename", func(t *testing.T) {
199+
content := "SHA512 (file.tar.gz) = abc123\n"
200+
201+
sources, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
202+
203+
require.NoError(t, err)
204+
require.Len(t, sources, 1)
205+
assert.Equal(t, "file.tar.gz", sources[0].fileName)
206+
assert.Equal(t, fileutils.HashType("SHA512"), sources[0].hashType)
207+
assert.Equal(t, "abc123", sources[0].expectedHash)
208+
assert.Equal(t, "https://example.com/sha512/abc123/pkg/file.tar.gz", sources[0].uri)
209+
})
210+
211+
t.Run("legacy format defaults to MD5", func(t *testing.T) {
212+
content := "abc123def456 legacy.tar.gz\n"
213+
214+
sources, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
215+
216+
require.NoError(t, err)
217+
require.Len(t, sources, 1)
218+
assert.Equal(t, "legacy.tar.gz", sources[0].fileName)
219+
assert.Equal(t, fileutils.HashType("MD5"), sources[0].hashType)
220+
assert.Equal(t, "abc123def456", sources[0].expectedHash)
221+
})
222+
223+
t.Run("mixed case hex values are preserved", func(t *testing.T) {
224+
content := "SHA256 (file.tar.gz) = AbCdEf123456\n"
225+
226+
sources, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
227+
228+
require.NoError(t, err)
229+
assert.Equal(t, "AbCdEf123456", sources[0].expectedHash)
230+
})
231+
232+
t.Run("skips empty lines and comments", func(t *testing.T) {
233+
content := "\n# this is a comment\nSHA256 (file.tar.gz) = abc123\n\n"
234+
235+
sources, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
236+
237+
require.NoError(t, err)
238+
require.Len(t, sources, 1)
239+
})
240+
241+
t.Run("multiple entries", func(t *testing.T) {
242+
content := "SHA512 (first.tar.gz) = aabbcc112233\nSHA256 (second.patch) = ddeeff445566\n"
243+
244+
sources, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
245+
246+
require.NoError(t, err)
247+
require.Len(t, sources, 2)
248+
assert.Equal(t, "first.tar.gz", sources[0].fileName)
249+
assert.Equal(t, "second.patch", sources[1].fileName)
250+
})
251+
252+
t.Run("invalid format returns error with line number", func(t *testing.T) {
253+
content := "SHA512 (valid.tar.gz) = abc123\nnot a valid line\n"
254+
255+
_, err := parseSourcesFile(content, "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
256+
257+
require.Error(t, err)
258+
assert.Contains(t, err.Error(), "line 2")
259+
})
260+
261+
t.Run("empty content returns empty slice", func(t *testing.T) {
262+
sources, err := parseSourcesFile("", "pkg", "https://example.com/$hashtype/$hash/$pkg/$filename")
263+
264+
require.NoError(t, err)
265+
assert.Empty(t, sources)
266+
})
267+
}
268+
269+
func TestVerifyFedoraLookasideBaseURI(t *testing.T) {
270+
t.Run("valid URI with all placeholders", func(t *testing.T) {
271+
err := verifyFedoraLookasideBaseURI("https://example.com/$hashtype/$hash/$pkg/$filename")
272+
require.NoError(t, err)
273+
})
274+
275+
t.Run("missing placeholder returns error", func(t *testing.T) {
276+
tests := []struct {
277+
uri string
278+
missing string
279+
}{
280+
{"https://example.com/$hash/$pkg/$filename", "$hashtype"},
281+
{"https://example.com/$hashtype/$hash/$filename", "$pkg"},
282+
{"https://example.com/$hashtype/$hash/$pkg/", "$filename"},
283+
}
284+
285+
for _, tc := range tests {
286+
t.Run(tc.missing, func(t *testing.T) {
287+
err := verifyFedoraLookasideBaseURI(tc.uri)
288+
require.Error(t, err, "URI %q should fail for missing %s", tc.uri, tc.missing)
289+
assert.Contains(t, err.Error(), tc.missing)
290+
})
291+
}
292+
})
293+
}

0 commit comments

Comments
 (0)