Skip to content
23 changes: 23 additions & 0 deletions Sources/Containerization/DNSConfiguration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// limitations under the License.
//===----------------------------------------------------------------------===//

import ContainerizationError
import ContainerizationExtras

/// DNS configuration for a container. The values will be used to
/// construct /etc/resolv.conf for a given container.
public struct DNS: Sendable {
Expand Down Expand Up @@ -41,6 +44,26 @@ public struct DNS: Sendable {
self.searchDomains = searchDomains
self.options = options
}

/// Validates the DNS configuration.
///
/// Ensures that all nameserver entries are valid IPv4 or IPv6 addresses.
/// Arbitrary hostnames are not permitted as nameservers.
///
/// - Throws: ``ContainerizationError`` with code `.invalidArgument` if
/// any nameserver is not a valid IP address.
public func validate() throws {
for nameserver in nameservers {
let isValidIPv4 = (try? IPv4Address(nameserver)) != nil
let isValidIPv6 = (try? IPv6Address(nameserver)) != nil
if !isValidIPv4 && !isValidIPv6 {
throw ContainerizationError(
.invalidArgument,
message: "nameserver '\(nameserver)' is not a valid IPv4 or IPv6 address"
)
}
}
}
}

extension DNS {
Expand Down
6 changes: 2 additions & 4 deletions Sources/Containerization/Image/InitImage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ extension InitImage {
var result = try writer.create(from: rootfs)
let layerDescriptor = Descriptor(mediaType: ContainerizationOCI.MediaTypes.imageLayerGzip, digest: result.digest.digestString, size: result.size)

// TODO: compute and fill in the correct diffID for the above layer
// We currently put in the sha of the fully compressed layer, this needs to be replaced with
// the sha of the uncompressed layer.
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [result.digest.digestString])
let diffID = try ContentWriter.diffID(of: rootfs)
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [diffID.digestString])
let runtimeConfig = ContainerizationOCI.ImageConfig(labels: labels)
let imageConfig = ContainerizationOCI.Image(architecture: platform.architecture, os: platform.os, config: runtimeConfig, rootfs: rootfsConfig)
result = try writer.create(from: imageConfig)
Expand Down
1 change: 1 addition & 0 deletions Sources/Containerization/Vminitd.swift
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ extension Vminitd {

/// Configure DNS within the sandbox's environment.
public func configureDNS(config: DNS, location: String) async throws {
try config.validate()
_ = try await client.configureDns(
.with {
$0.location = location
Expand Down
177 changes: 177 additions & 0 deletions Sources/ContainerizationOCI/Content/ContentWriter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
// limitations under the License.
//===----------------------------------------------------------------------===//

import Compression
import ContainerizationError
import Crypto
import Foundation
import NIOCore
import zlib

/// Provides a context to write data into a directory.
public class ContentWriter {
Expand Down Expand Up @@ -60,6 +62,175 @@ public class ContentWriter {
return try self.write(data)
}

/// Computes the SHA256 digest of the uncompressed content of a gzip file.
///
/// Per the OCI Image Specification, a DiffID is the SHA256 digest of the
/// uncompressed layer content. This method streams the compressed file in
/// chunks, decompresses through Apple's Compression framework, and feeds
/// each decompressed chunk into an incremental SHA256 hasher. Neither the
/// full compressed nor the full decompressed data is held in memory.
///
/// - Parameter url: The URL of the gzip-compressed file.
/// - Returns: The SHA256 digest of the uncompressed content.
public static func diffID(of url: URL) throws -> SHA256.Digest {
let fileHandle = try FileHandle(forReadingFrom: url)
defer { fileHandle.closeFile() }

// Read just enough to parse the gzip header (initial 512 bytes is plenty).
let headerReadSize = 512
guard let headerData = Self.readExactly(fileHandle: fileHandle, count: headerReadSize), !headerData.isEmpty else {
throw ContentWriterError.invalidGzip
}
let headerSize = try Self.gzipHeaderSize(headerData)

// Read the gzip trailer (last 8 bytes) to validate CRC32 + ISIZE later.
// Seek to the end to get the file size, then read the trailer.
fileHandle.seekToEndOfFile()
let fileSize = fileHandle.offsetInFile
guard fileSize >= 8 else {
throw ContentWriterError.gzipTrailerMismatch
}
fileHandle.seek(toFileOffset: fileSize - 8)
guard let trailerData = Self.readExactly(fileHandle: fileHandle, count: 8) else {
throw ContentWriterError.gzipTrailerMismatch
}
let expectedCRC = UInt32(trailerData[trailerData.startIndex])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Just to be on the safer side, we probably need a check before this line.

guard trailerData.count == 8 else {
      throw ContentWriterError.gzipTrailerMismatch
  }

| (UInt32(trailerData[trailerData.startIndex + 1]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 2]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 3]) << 24)
let expectedSize = UInt32(trailerData[trailerData.startIndex + 4])
| (UInt32(trailerData[trailerData.startIndex + 5]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 6]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 7]) << 24)

// Seek past the gzip header to the start of the deflate stream.
// The deflate data spans from headerSize to fileSize - 8 (the last 8 bytes
// are the gzip trailer: CRC32 + ISIZE). We must not feed the trailer to
// the decompressor.
fileHandle.seek(toFileOffset: UInt64(headerSize))
var compressedBytesRemaining = Int(fileSize) - headerSize - 8
guard compressedBytesRemaining >= 0 else {
throw ContentWriterError.invalidGzip
}

// Set up the decompression stream.
let chunkSize = 65_536
let sourceBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
defer {
sourceBuffer.deallocate()
destinationBuffer.deallocate()
}

let stream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
defer { stream.deallocate() }

var status = compression_stream_init(stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB)
guard status != COMPRESSION_STATUS_ERROR else {
throw ContentWriterError.decompressionFailed
}
defer { compression_stream_destroy(stream) }

// Start with an empty source; we fill it from the file below.
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = 0
stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

var hasher = SHA256()
var runningCRC: uLong = crc32(0, nil, 0)
var totalDecompressedSize: UInt64 = 0
var inputExhausted = false

while status != COMPRESSION_STATUS_END {
// Refill the source buffer when it is exhausted and more data is available.
if stream.pointee.src_size == 0 && !inputExhausted {
let toRead = min(chunkSize, compressedBytesRemaining)
if toRead > 0, let chunk = fileHandle.readData(ofLength: toRead) as Data?, !chunk.isEmpty {
compressedBytesRemaining -= chunk.count
chunk.copyBytes(to: sourceBuffer, count: chunk.count)
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = chunk.count
} else {
inputExhausted = true
}
}

stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

let flags: Int32 = inputExhausted ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0
status = compression_stream_process(stream, flags)

switch status {
case COMPRESSION_STATUS_OK, COMPRESSION_STATUS_END:
let produced = chunkSize - stream.pointee.dst_size
if produced > 0 {
let buf = UnsafeBufferPointer(start: destinationBuffer, count: produced)
hasher.update(bufferPointer: UnsafeRawBufferPointer(buf))
runningCRC = crc32(runningCRC, destinationBuffer, uInt(produced))
totalDecompressedSize += UInt64(produced)
}

default:
throw ContentWriterError.decompressionFailed
}
}

// Validate the gzip trailer.
let actualCRC = UInt32(truncatingIfNeeded: runningCRC)
let actualSize = UInt32(truncatingIfNeeded: totalDecompressedSize)

guard expectedCRC == actualCRC, expectedSize == actualSize else {
throw ContentWriterError.gzipTrailerMismatch
}

return hasher.finalize()
}

/// Reads exactly `count` bytes from a FileHandle, returning nil on failure.
private static func readExactly(fileHandle: FileHandle, count: Int) -> Data? {
let data = fileHandle.readData(ofLength: count)
return data.isEmpty ? nil : data
}

/// Parses the gzip header to determine where the raw deflate stream begins.
private static func gzipHeaderSize(_ data: Data) throws -> Int {
guard data.count >= 10,
data[data.startIndex] == 0x1f,
data[data.startIndex + 1] == 0x8b,
data[data.startIndex + 2] == 0x08 // CM must be 8 (deflate) per RFC 1952
else {
throw ContentWriterError.invalidGzip
}

let start = data.startIndex
let flags = data[start + 3]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: what's the reason the current changes skipped compression method (CM) (ref https://datatracker.ietf.org/doc/html/rfc1952#page-5) entirely.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — the header parser was only checking the magic bytes (1f 8b) but not the compression method. I've added a guard for CM == 0x08 (deflate), which is the only method defined by RFC 1952. Anything else will now throw invalidGzip.

var offset = 10

// FEXTRA
if flags & 0x04 != 0 {
guard data.count >= offset + 2 else { throw ContentWriterError.invalidGzip }
let extraLen = Int(data[start + offset]) | (Int(data[start + offset + 1]) << 8)
offset += 2 + extraLen
}
// FNAME
if flags & 0x08 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FCOMMENT
if flags & 0x10 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FHCRC
if flags & 0x02 != 0 { offset += 2 }

guard offset < data.count else { throw ContentWriterError.invalidGzip }
return offset
}

/// Encodes the passed in type as a JSON blob and writes it to the base path.
/// - Parameters:
/// - content: The type to convert to JSON.
Expand All @@ -69,3 +240,9 @@ public class ContentWriter {
return try self.write(data)
}
}

enum ContentWriterError: Error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should throw ContainerizationError instead of this new error type?

case invalidGzip
case decompressionFailed
case gzipTrailerMismatch
}
Loading
Loading