Skip to content

Commit ea7f603

Browse files
Mario Leyvacopybara-github
Mario Leyva
authored andcommitted
Change the SCALIBR image's content storage to be a single blob in disk.
PiperOrigin-RevId: 758990296
1 parent 103fced commit ea7f603

File tree

6 files changed

+282
-497
lines changed

6 files changed

+282
-497
lines changed

artifact/image/layerscanning/image/image.go

Lines changed: 47 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ const (
4747
// since that is the maximum number of symlinks the os.Root API will handle. From the os.Root API,
4848
// "8 is __POSIX_SYMLOOP_MAX (the minimum allowed value for SYMLOOP_MAX), and a common limit".
4949
DefaultMaxSymlinkDepth = 6
50+
51+
// filePermission represents the permission bits for a file, which are minimal since files in the
52+
// layer scanning use case are read-only.
53+
filePermission = 0600
54+
// dirPermission represents the permission bits for a directory, which are minimal since
55+
// directories in the layer scanning use case are read-only.
56+
dirPermission = 0700
5057
)
5158

5259
var (
@@ -83,8 +90,7 @@ func DefaultConfig() *Config {
8390
// image. Checks include:
8491
//
8592
// (1) MaxFileBytes is positive.
86-
// (2) Requirer is not nil.
87-
// (3) MaxSymlinkDepth is non-negative.
93+
// (2) MaxSymlinkDepth is non-negative.
8894
func validateConfig(config *Config) error {
8995
if config.MaxFileBytes <= 0 {
9096
return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes)
@@ -101,9 +107,8 @@ type Image struct {
101107
chainLayers []*chainLayer
102108
config *Config
103109
size int64
104-
root *os.Root
105-
ExtractDir string
106110
BaseImageIndex int
111+
contentBlob *os.File
107112
}
108113

109114
// TopFS returns the filesystem of the top-most chainlayer of the image. All available files should
@@ -129,7 +134,15 @@ func (img *Image) ChainLayers() ([]scalibrImage.ChainLayer, error) {
129134

130135
// CleanUp removes the temporary directory used to store the image files.
131136
func (img *Image) CleanUp() error {
132-
return os.RemoveAll(img.ExtractDir)
137+
if img.contentBlob == nil {
138+
return nil
139+
}
140+
141+
if err := img.contentBlob.Close(); err != nil {
142+
log.Warnf("failed to close content blob: %v", err)
143+
}
144+
145+
return os.Remove(img.contentBlob.Name())
133146
}
134147

135148
// Size returns the size of the underlying directory of the image in bytes.
@@ -192,20 +205,10 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
192205
return nil, fmt.Errorf("failed to initialize chain layers: %w", err)
193206
}
194207

195-
imageExtractionPath, err := os.MkdirTemp("", "osv-scalibr-image-scanning-*")
196-
if err != nil {
197-
return nil, fmt.Errorf("failed to create temporary directory: %w", err)
198-
}
199-
200-
// OpenRoot assumes that the provided directory is trusted. In this case, we created the
201-
// imageExtractionPath directory, so it is indeed trusted.
202-
root, err := os.OpenRoot(imageExtractionPath)
208+
imageContentBlob, err := os.CreateTemp("", "image-blob-*")
203209
if err != nil {
204-
return nil, fmt.Errorf("failed to open root directory: %w", err)
210+
return nil, fmt.Errorf("failed to create image content file: %w", err)
205211
}
206-
// Close the root directory at the end of the function, since no more files will be unpacked
207-
// afterward.
208-
defer root.Close()
209212

210213
baseImageIndex, err := findBaseImageIndex(history)
211214
if err != nil {
@@ -215,14 +218,13 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
215218
outputImage := &Image{
216219
chainLayers: chainLayers,
217220
config: config,
218-
root: root,
219-
ExtractDir: imageExtractionPath,
220221
BaseImageIndex: baseImageIndex,
222+
contentBlob: imageContentBlob,
221223
}
222224

223225
// Add the root directory to each chain layer. If this is not done, then the virtual paths won't
224226
// be rooted, and traversal in the virtual filesystem will be broken.
225-
if err := addRootDirectoryToChainLayers(outputImage.chainLayers, imageExtractionPath); err != nil {
227+
if err := addRootDirectoryToChainLayers(outputImage.chainLayers); err != nil {
226228
return nil, handleImageError(outputImage, fmt.Errorf("failed to add root directory to chain layers: %w", err))
227229
}
228230

@@ -240,13 +242,6 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
240242
continue
241243
}
242244

243-
layerDir := layerDirectory(i)
244-
245-
// Create the chain layer directory if it doesn't exist.
246-
if err := root.Mkdir(layerDir, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) {
247-
return nil, handleImageError(outputImage, fmt.Errorf("failed to create chain layer directory: %w", err))
248-
}
249-
250245
if v1LayerIndex < 0 {
251246
return nil, handleImageError(outputImage, fmt.Errorf("mismatch between v1 layers and chain layers, on v1 layer index %d, but only %d v1 layers", v1LayerIndex, len(v1Layers)))
252247
}
@@ -265,12 +260,10 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
265260
defer layerReader.Close()
266261

267262
tarReader := tar.NewReader(layerReader)
268-
layerSize, err := fillChainLayersWithFilesFromTar(outputImage, tarReader, layerDir, chainLayersToFill)
269-
if err != nil {
263+
if err := fillChainLayersWithFilesFromTar(outputImage, tarReader, chainLayersToFill); err != nil {
270264
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
271265
}
272266

273-
outputImage.size += layerSize
274267
return nil
275268
}()
276269

@@ -286,16 +279,10 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
286279
// Helper functions
287280
// ========================================================
288281

289-
func layerDirectory(layerIndex int) string {
290-
return fmt.Sprintf("layer-%d", layerIndex)
291-
}
292-
293282
// addRootDirectoryToChainLayers adds the root ("/") directory to each chain layer.
294-
func addRootDirectoryToChainLayers(chainLayers []*chainLayer, extractDir string) error {
295-
for i, chainLayer := range chainLayers {
283+
func addRootDirectoryToChainLayers(chainLayers []*chainLayer) error {
284+
for _, chainLayer := range chainLayers {
296285
err := chainLayer.fileNodeTree.Insert("/", &virtualFile{
297-
extractDir: extractDir,
298-
layerDir: layerDirectory(i),
299286
virtualPath: "/",
300287
isWhiteout: false,
301288
mode: fs.ModeDir,
@@ -430,23 +417,20 @@ func initializeChainLayers(v1Layers []v1.Layer, history []v1.History, maxSymlink
430417
// fillChainLayersWithFilåesFromTar fills the chain layers with the files found in the tar. The
431418
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
432419
// filesystem.
433-
func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, layerDir string, chainLayersToFill []*chainLayer) (int64, error) {
420+
func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, chainLayersToFill []*chainLayer) error {
434421
if len(chainLayersToFill) == 0 {
435-
return 0, errors.New("no chain layers provided, this should not happen")
422+
return errors.New("no chain layers provided, this should not happen")
436423
}
437424

438425
currentChainLayer := chainLayersToFill[0]
439426

440-
// layerSize is the cumulative size of all the extracted files in the tar.
441-
var layerSize int64
442-
443427
for {
444428
header, err := tarReader.Next()
445429
if errors.Is(err, io.EOF) {
446430
break
447431
}
448432
if err != nil {
449-
return 0, fmt.Errorf("could not read tar: %w", err)
433+
return fmt.Errorf("could not read tar: %w", err)
450434
}
451435

452436
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
@@ -501,11 +485,11 @@ func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, layerDir
501485
var newVirtualFile *virtualFile
502486
switch header.Typeflag {
503487
case tar.TypeDir:
504-
newVirtualFile, err = img.handleDir(virtualPath, layerDir, header, isWhiteout)
488+
newVirtualFile = img.handleDir(virtualPath, header, isWhiteout)
505489
case tar.TypeReg:
506-
newVirtualFile, err = img.handleFile(virtualPath, layerDir, tarReader, header, isWhiteout)
490+
newVirtualFile, err = img.handleFile(virtualPath, tarReader, header, isWhiteout)
507491
case tar.TypeSymlink, tar.TypeLink:
508-
newVirtualFile, err = img.handleSymlink(virtualPath, layerDir, header, isWhiteout)
492+
newVirtualFile, err = img.handleSymlink(virtualPath, header, isWhiteout)
509493
default:
510494
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
511495
continue
@@ -518,14 +502,12 @@ func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, layerDir
518502
log.Warnf("failed to handle tar entry with path %s: %w", virtualPath, err)
519503
continue
520504
}
521-
return 0, fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
505+
return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
522506
}
523507

524-
layerSize += header.Size
525-
526508
// If the virtual path has any directories and those directories have not been populated, then
527509
// populate them with file nodes.
528-
populateEmptyDirectoryNodes(virtualPath, layerDir, img.ExtractDir, chainLayersToFill)
510+
populateEmptyDirectoryNodes(virtualPath, chainLayersToFill)
529511

530512
// In each outer loop, a layer is added to each relevant output chainLayer slice. Because the
531513
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
@@ -536,13 +518,13 @@ func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, layerDir
536518
layer := currentChainLayer.latestLayer.(*Layer)
537519
_ = layer.fileNodeTree.Insert(virtualPath, newVirtualFile)
538520
}
539-
return layerSize, nil
521+
return nil
540522
}
541523

542524
// populateEmptyDirectoryNodes populates the chain layers with file nodes for any directory paths
543525
// that do not have an associated file node. This is done by creating a file node for each directory
544526
// in the virtual path and then filling the chain layers with that file node.
545-
func populateEmptyDirectoryNodes(virtualPath, layerDir, extractDir string, chainLayersToFill []*chainLayer) {
527+
func populateEmptyDirectoryNodes(virtualPath string, chainLayersToFill []*chainLayer) {
546528
currentChainLayer := chainLayersToFill[0]
547529

548530
runningDir := "/"
@@ -557,8 +539,6 @@ func populateEmptyDirectoryNodes(virtualPath, layerDir, extractDir string, chain
557539
}
558540

559541
node := &virtualFile{
560-
extractDir: extractDir,
561-
layerDir: layerDir,
562542
virtualPath: runningDir,
563543
isWhiteout: false,
564544
mode: fs.ModeDir,
@@ -569,7 +549,7 @@ func populateEmptyDirectoryNodes(virtualPath, layerDir, extractDir string, chain
569549

570550
// handleSymlink returns the symlink header mode. Symlinks are handled by creating a virtual file
571551
// with the symlink mode with additional metadata.
572-
func (img *Image) handleSymlink(virtualPath, layerDir string, header *tar.Header, isWhiteout bool) (*virtualFile, error) {
552+
func (img *Image) handleSymlink(virtualPath string, header *tar.Header, isWhiteout bool) (*virtualFile, error) {
573553
targetPath := filepath.ToSlash(header.Linkname)
574554
if targetPath == "" {
575555
return nil, errors.New("symlink header has no target path")
@@ -586,8 +566,6 @@ func (img *Image) handleSymlink(virtualPath, layerDir string, header *tar.Header
586566
}
587567

588568
return &virtualFile{
589-
extractDir: img.ExtractDir,
590-
layerDir: layerDir,
591569
virtualPath: virtualPath,
592570
targetPath: targetPath,
593571
isWhiteout: isWhiteout,
@@ -596,46 +574,23 @@ func (img *Image) handleSymlink(virtualPath, layerDir string, header *tar.Header
596574
}
597575

598576
// handleDir creates the directory specified by path, if it doesn't exist.
599-
func (img *Image) handleDir(virtualPath, layerDir string, header *tar.Header, isWhiteout bool) (*virtualFile, error) {
600-
realFilePath := filepath.Join(img.ExtractDir, layerDir, filepath.FromSlash(virtualPath))
601-
if _, err := img.root.Stat(filepath.Join(layerDir, filepath.FromSlash(virtualPath))); err != nil {
602-
if err := os.MkdirAll(realFilePath, dirPermission); err != nil {
603-
return nil, fmt.Errorf("failed to create directory with realFilePath %s: %w", realFilePath, err)
604-
}
605-
}
606-
577+
func (img *Image) handleDir(virtualPath string, header *tar.Header, isWhiteout bool) *virtualFile {
607578
fileInfo := header.FileInfo()
608579

609580
return &virtualFile{
610-
extractDir: img.ExtractDir,
611-
layerDir: layerDir,
612581
virtualPath: virtualPath,
613582
isWhiteout: isWhiteout,
614583
mode: fileInfo.Mode() | fs.ModeDir,
615584
size: fileInfo.Size(),
616585
modTime: fileInfo.ModTime(),
617-
}, nil
586+
}
618587
}
619588

620589
// handleFile creates the file specified by path, and then copies the contents of the tarReader into
621590
// the file. The function returns a virtual file, which is meant to represent the file in a virtual
622591
// filesystem.
623-
func (img *Image) handleFile(virtualPath, layerDir string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*virtualFile, error) {
624-
realFilePath := filepath.Join(img.ExtractDir, layerDir, filepath.FromSlash(virtualPath))
625-
parentDirectory := filepath.Dir(realFilePath)
626-
if err := os.MkdirAll(parentDirectory, dirPermission); err != nil {
627-
return nil, fmt.Errorf("failed to create parent directory %s: %w", parentDirectory, err)
628-
}
629-
630-
// Write all files as read/writable by the current user, inaccessible by anyone else. Actual
631-
// permission bits are stored in FileNode.
632-
f, err := img.root.OpenFile(filepath.Join(layerDir, filepath.FromSlash(virtualPath)), os.O_CREATE|os.O_RDWR, filePermission)
633-
if err != nil {
634-
return nil, err
635-
}
636-
defer f.Close()
637-
638-
numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes))
592+
func (img *Image) handleFile(virtualPath string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*virtualFile, error) {
593+
numBytes, err := img.contentBlob.ReadFrom(io.LimitReader(tarReader, img.config.MaxFileBytes))
639594
if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) {
640595
return nil, ErrFileReadLimitExceeded
641596
}
@@ -644,16 +599,20 @@ func (img *Image) handleFile(virtualPath, layerDir string, tarReader *tar.Reader
644599
return nil, fmt.Errorf("unable to copy file: %w", err)
645600
}
646601

602+
// Record the offset of the file in the content blob before adding the new bytes. The offset is
603+
// the current size of the content blob.
604+
offset := img.size
605+
// Update the image size with the number of bytes read into the content blob.
606+
img.size += numBytes
647607
fileInfo := header.FileInfo()
648608

649609
return &virtualFile{
650-
extractDir: img.ExtractDir,
651-
layerDir: layerDir,
652610
virtualPath: virtualPath,
653611
isWhiteout: isWhiteout,
654612
mode: fileInfo.Mode(),
655-
size: fileInfo.Size(),
656613
modTime: fileInfo.ModTime(),
614+
size: numBytes,
615+
reader: io.NewSectionReader(img.contentBlob, offset, numBytes),
657616
}, nil
658617
}
659618

artifact/image/layerscanning/image/image_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,8 +1238,6 @@ func TestTopFS(t *testing.T) {
12381238
fileNodeTree: func() *Node {
12391239
root := NewNode()
12401240
_ = root.Insert("/", &virtualFile{
1241-
extractDir: "",
1242-
layerDir: "",
12431241
virtualPath: "/",
12441242
isWhiteout: false,
12451243
mode: fs.ModeDir | dirPermission,

0 commit comments

Comments
 (0)