Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 54 additions & 8 deletions internal/ldconfig/ldconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"bufio"
"flag"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
Expand Down Expand Up @@ -168,6 +169,7 @@ func (l *Ldconfig) UpdateLDCache() error {
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdFilenamePattern, err)
}

systemSearchPaths := l.getSystemSearchPaths()
// In most cases, the hook will be executing a host ldconfig that may be configured widely
// differently from what the container image expects.
// The common case is Debian-like (e.g. Debian, Ubuntu) vs non-Debian-like (e.g. RHEL, Fedora).
Expand All @@ -176,10 +178,15 @@ func (l *Ldconfig) UpdateLDCache() error {
// paths to a drop-in conf file that is likely to be last in lexicographic order. Entries in the
// top-level ld.so.conf file may be processed after this drop-in, but this hook does not modify
// the top-level file if it exists.
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdSystemDirsFilenamePattern, l.getSystemSearchPaths()...); err != nil {
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdSystemDirsFilenamePattern, systemSearchPaths...); err != nil {
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdSystemDirsFilenamePattern, err)
}

// Also output the folders to the alpine .path file as required.
if err := createAlpinePathFileIfRequired(append(filteredDirectories, systemSearchPaths...)...); err != nil {
return err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a more descriptive error message here.

}

return SafeExec(ldconfigPath, args, nil)
}

Expand Down Expand Up @@ -246,23 +253,30 @@ func createLdsoconfdFile(ldsoconfdDir, pattern string, dirs ...string) error {
_ = configFile.Close()
}()

if err := outputListToFile(configFile, dirs...); err != nil {
return err
}

// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}

return nil
}

func outputListToFile(w io.Writer, dirs ...string) error {
added := make(map[string]bool)
for _, dir := range dirs {
if added[dir] {
continue
}
_, err := fmt.Fprintf(configFile, "%s\n", dir)
_, err := fmt.Fprintf(w, "%s\n", dir)
if err != nil {
return fmt.Errorf("failed to update config file: %w", err)
}
added[dir] = true
}

// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}

return nil
}

Expand Down Expand Up @@ -360,6 +374,38 @@ func processLdsoconfFile(ldsoconfFilename string) ([]string, []string, error) {
return directories, includedFilenames, nil
}

func createAlpinePathFileIfRequired(dirs ...string) error {
if !isAlpine() {
return nil
}

var pathFileName string
switch runtime.GOARCH {
case "amd64":
pathFileName = "/etc/ld-musl-x86_64.path"
case "arm64":
pathFileName = "/etc/ld-musl-aarch64.path"
}

pathFile, err := os.OpenFile(pathFileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644)
if err != nil {
return fmt.Errorf("could not open .path file: %w", err)
}
defer func() {
_ = pathFile.Close()
}()

return outputListToFile(pathFile, dirs...)
}

func isAlpine() bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially out-of-scope -- is there an easy way of detecting if musl is the standard C library used in a distro? If yes, would we be open to expanding this change to be distro-agnostic? Meaning we would create the /etc/ld-musl-$ARCH.path file for any container that uses musl?

A quick LLM query suggested the below:

ldd --version 2>&1 | grep -q musl && echo "Using musl" || echo "Using glibc"

info, err := os.Stat("/etc/alpine-release")
if err != nil {
return false
}
return !info.IsDir()
}

// isDebianLike returns true if a Debian-like distribution is detected.
// Debian-like distributions include Debian and Ubuntu, whereas non-Debian-like
// distributions include RHEL and Fedora.
Expand Down
36 changes: 30 additions & 6 deletions tests/e2e/nvidia-container-toolkit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
var hostDriverVersion string
var hostDriverMajor string
var hostOutput string

// Install the NVIDIA Container Toolkit
BeforeAll(func(ctx context.Context) {
Expand All @@ -41,20 +42,17 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
hostDriverVersion = strings.TrimSpace(parts[1])
Expect(hostDriverVersion).ToNot(BeEmpty())
hostDriverMajor = strings.SplitN(hostDriverVersion, ".", 2)[0]

hostOutput, _, err = runner.Run("nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
})

// GPUs are accessible in a container: Running nvidia-smi -L inside the
// container shows the same output inside the container as outside the
// container. This means that the following commands must all produce
// the same output
When("running nvidia-smi -L", Ordered, func() {
var hostOutput string
var err error

BeforeAll(func(ctx context.Context) {
hostOutput, _, err = runner.Run("nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())

_, _, err := runner.Run("docker pull ubuntu")
Expect(err).ToNot(HaveOccurred())
})
Expand Down Expand Up @@ -589,4 +587,30 @@ EOF`)
Expect(output).To(Equal(expectedOutput))
})
})

When("running an alpine container", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, _, err := runner.Run(`docker build -t alpinecompat \
- <<EOF
FROM alpine
ENV NVIDIA_VISIBLE_DEVICES=all
RUN apk add --no-cache gcompat
EOF`)
Expect(err).ToNot(HaveOccurred())
})

It("nvidia-smi should succeed when using the nvidia-container-runtime", func(ctx context.Context) {
output, _, err := runner.Run(`docker run --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -e NVIDIA_DRIVER_CAPABILITIES=all \
alpinecompat nvidia-smi -L`)
Expect(err).ToNot(HaveOccurred())
Expect(output).To(Equal(hostOutput))
})

It("nvidia-smi should succeed when using the nvidia-container-runtime-hook", Label("legacy"), func(ctx context.Context) {
output, _, err := runner.Run(`docker run --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -e NVIDIA_DRIVER_CAPABILITIES=all \
alpinecompat nvidia-smi -L`)
Expect(err).ToNot(HaveOccurred())
Expect(output).To(Equal(hostOutput))
})
Comment on lines +609 to +614
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question -- was this test case passing before the relevant changes in this PR? If so, what is libnvidia-container doing to handle containers that use musl instead of glibc?

})
})