diff --git a/README.md b/README.md index f159e44b..975bc28e 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,12 @@ Besides `GITEA_INSTANCE_URL` and `GITEA_RUNNER_REGISTRATION_TOKEN`, the image en For a fuller container-oriented walkthrough, see [examples/docker](examples/docker/README.md). +When `container.bind_workdir` is enabled, stale task workspace directories can be cleaned while the runner is idle: +- directories older than `runner.workdir_cleanup_age` are removed (default: `24h`; set `0` to disable) +- cleanup runs every `runner.idle_cleanup_interval` (default: `10m`; set `0` to disable) +- only purely numeric subdirectories under `container.workdir_parent` are treated as task workspaces and may be removed +- cleanup assumes `container.workdir_parent` is not shared across multiple runners + ### Example Deployments Check out the [examples](examples) directory for sample deployment types. diff --git a/internal/app/poll/poller.go b/internal/app/poll/poller.go index 37ab7b28..9e3ea61a 100644 --- a/internal/app/poll/poller.go +++ b/internal/app/poll/poller.go @@ -27,6 +27,11 @@ type TaskRunner interface { Run(ctx context.Context, task *runnerv1.Task) error } +// IdleRunner can run maintenance while the poller is idle. +type IdleRunner interface { + OnIdle(ctx context.Context) +} + type Poller struct { client client.Client runner TaskRunner @@ -95,6 +100,7 @@ func (p *Poller) Poll() { task, ok := p.fetchTask(p.pollingCtx, s) if !ok { + p.runIdleMaintenance() <-sem if !p.waitBackoff(s) { return @@ -119,6 +125,7 @@ func (p *Poller) PollOnce() { for { task, ok := p.fetchTask(p.pollingCtx, s) if !ok { + p.runIdleMaintenance() if !p.waitBackoff(s) { return } @@ -130,6 +137,12 @@ func (p *Poller) PollOnce() { } } +func (p *Poller) runIdleMaintenance() { + if idleRunner, ok := p.runner.(IdleRunner); ok { + idleRunner.OnIdle(p.jobsCtx) + } +} + func (p *Poller) Shutdown(ctx context.Context) error { p.shutdownPolling() diff --git a/internal/app/poll/poller_test.go b/internal/app/poll/poller_test.go index e9d10d12..61e43c05 100644 --- a/internal/app/poll/poller_test.go +++ b/internal/app/poll/poller_test.go @@ -125,6 +125,11 @@ type mockRunner struct { totalCompleted atomic.Int64 } +type idleAwareRunner struct { + mockRunner + idleCalls atomic.Int64 +} + func (m *mockRunner) Run(ctx context.Context, _ *runnerv1.Task) error { atomicMax(&m.maxConcurrent, m.running.Add(1)) select { @@ -136,6 +141,78 @@ func (m *mockRunner) Run(ctx context.Context, _ *runnerv1.Task) error { return nil } +func TestPollerRunIdleMaintenance(t *testing.T) { + runner := &idleAwareRunner{} + p := &Poller{runner: runner, jobsCtx: context.Background()} + + p.runIdleMaintenance() + + assert.Equal(t, int64(1), runner.idleCalls.Load()) +} + +func (m *idleAwareRunner) OnIdle(_ context.Context) { + m.idleCalls.Add(1) +} + +func TestPollerPollCallsOnIdle(t *testing.T) { + cli := mocks.NewClient(t) + cli.On("FetchTask", mock.Anything, mock.Anything).Return( + func(_ context.Context, _ *connect_go.Request[runnerv1.FetchTaskRequest]) (*connect_go.Response[runnerv1.FetchTaskResponse], error) { + return connect_go.NewResponse(&runnerv1.FetchTaskResponse{}), nil + }, + ) + + cfg, err := config.LoadDefault("") + require.NoError(t, err) + cfg.Runner.Capacity = 1 + cfg.Runner.FetchInterval = 10 * time.Millisecond + cfg.Runner.FetchIntervalMax = 10 * time.Millisecond + + runner := &idleAwareRunner{} + poller := New(cfg, cli, runner) + + var wg sync.WaitGroup + wg.Go(poller.Poll) + + require.Eventually(t, func() bool { + return runner.idleCalls.Load() > 0 + }, time.Second, 10*time.Millisecond) + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, poller.Shutdown(ctx)) + wg.Wait() +} + +func TestPollerPollOnceCallsOnIdle(t *testing.T) { + cli := mocks.NewClient(t) + cli.On("FetchTask", mock.Anything, mock.Anything).Return( + func(_ context.Context, _ *connect_go.Request[runnerv1.FetchTaskRequest]) (*connect_go.Response[runnerv1.FetchTaskResponse], error) { + return connect_go.NewResponse(&runnerv1.FetchTaskResponse{}), nil + }, + ) + + cfg, err := config.LoadDefault("") + require.NoError(t, err) + cfg.Runner.FetchInterval = 10 * time.Millisecond + cfg.Runner.FetchIntervalMax = 10 * time.Millisecond + + runner := &idleAwareRunner{} + poller := New(cfg, cli, runner) + + var wg sync.WaitGroup + wg.Go(poller.PollOnce) + + require.Eventually(t, func() bool { + return runner.idleCalls.Load() > 0 + }, time.Second, 10*time.Millisecond) + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, poller.Shutdown(ctx)) + wg.Wait() +} + // TestPoller_ConcurrencyLimitedByCapacity verifies that with capacity=3 and // 6 available tasks, at most 3 tasks run concurrently, and FetchTask is // never called concurrently (single poller). diff --git a/internal/app/run/runner.go b/internal/app/run/runner.go index c10e87c3..bb67ef9d 100644 --- a/internal/app/run/runner.go +++ b/internal/app/run/runner.go @@ -7,12 +7,14 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "maps" "net/http" "os" "path/filepath" "runtime" + "strconv" "strings" "sync" "sync/atomic" @@ -46,8 +48,10 @@ type Runner struct { envs map[string]string cacheHandler *artifactcache.Handler - runningTasks sync.Map - runningCount atomic.Int64 + runningTasks sync.Map + runningCount atomic.Int64 + lastIdleCleanupUnixNano atomic.Int64 + now func() time.Time } func NewRunner(cfg *config.Config, reg *config.Registration, cli client.Client) *Runner { @@ -90,13 +94,94 @@ func NewRunner(cfg *config.Config, reg *config.Registration, cli client.Client) envs["GITEA_ACTIONS"] = "true" envs["GITEA_ACTIONS_RUNNER_VERSION"] = ver.Version() - return &Runner{ + runner := &Runner{ name: reg.Name, cfg: cfg, client: cli, labels: ls, envs: envs, cacheHandler: cacheHandler, + now: time.Now, + } + return runner +} + +// OnIdle performs lightweight maintenance during polling idle windows. +// It runs synchronously on the poller goroutine; shouldRunIdleCleanup +// throttles invocations to runner.idle_cleanup_interval so the impact on +// poll cadence is bounded even when the workdir root is large. +func (r *Runner) OnIdle(ctx context.Context) { + if !r.shouldRunIdleCleanup() { + return + } + workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/") + workdirRoot := filepath.FromSlash("/" + workdirParent) + r.cleanupStaleTaskDirs(ctx, workdirRoot) +} + +func (r *Runner) shouldRunIdleCleanup() bool { + if !r.cfg.Container.BindWorkdir { + return false + } + if r.cfg.Runner.WorkdirCleanupAge <= 0 || r.cfg.Runner.IdleCleanupInterval <= 0 { + return false + } + if r.RunningCount() != 0 { + return false + } + now := r.now() + interval := r.cfg.Runner.IdleCleanupInterval + for { + last := r.lastIdleCleanupUnixNano.Load() + if last != 0 && now.Sub(time.Unix(0, last)) < interval { + return false + } + if r.lastIdleCleanupUnixNano.CompareAndSwap(last, now.UnixNano()) { + return true + } + } +} + +func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) { + entries, err := os.ReadDir(workdirRoot) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return + } + log.Warnf("failed to list task workspace root %s for stale cleanup: %v", workdirRoot, err) + return + } + + // A task may begin between shouldRunIdleCleanup's running-count check and + // the loop below. That is safe because new task dirs are created with the + // current mtime and therefore fall on the keep side of cutoff. + cutoff := r.now().Add(-r.cfg.Runner.WorkdirCleanupAge) + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return + } + if !entry.IsDir() { + continue + } + // Task workspaces are indexed by numeric task IDs; skip any other + // directories to avoid deleting operator-managed data under workdir_root. + if _, err := strconv.ParseUint(entry.Name(), 10, 64); err != nil { + continue + } + info, err := entry.Info() + if err != nil { + log.Warnf("failed to stat task workspace %s: %v", filepath.Join(workdirRoot, entry.Name()), err) + continue + } + if info.ModTime().After(cutoff) { + continue + } + taskDir := filepath.Join(workdirRoot, entry.Name()) + if err := os.RemoveAll(taskDir); err != nil { + log.Warnf("failed to clean stale task workspace %s: %v", taskDir, err) + continue + } + log.Infof("cleaned stale task workspace %s", taskDir) } } diff --git a/internal/app/run/runner_idle_cleanup_test.go b/internal/app/run/runner_idle_cleanup_test.go new file mode 100644 index 00000000..8f1dce47 --- /dev/null +++ b/internal/app/run/runner_idle_cleanup_test.go @@ -0,0 +1,247 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package run + +import ( + "context" + "os" + "path/filepath" + "strconv" + "testing" + "time" + + "gitea.com/gitea/runner/internal/pkg/config" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRunnerCleanupStaleTaskDirs(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + workdirRoot := filepath.Join(t.TempDir(), "workspace") + require.NoError(t, os.MkdirAll(workdirRoot, 0o700)) + + oldTask := filepath.Join(workdirRoot, "1001") + freshTask := filepath.Join(workdirRoot, "1002") + nonTask := filepath.Join(workdirRoot, "shared") + alphaNumericTask := filepath.Join(workdirRoot, "123abc") + for _, path := range []string{oldTask, freshTask, nonTask, alphaNumericTask} { + require.NoError(t, os.MkdirAll(path, 0o700)) + } + + require.NoError(t, os.Chtimes(oldTask, now.Add(-3*time.Hour), now.Add(-3*time.Hour))) + require.NoError(t, os.Chtimes(freshTask, now.Add(-30*time.Minute), now.Add(-30*time.Minute))) + require.NoError(t, os.Chtimes(nonTask, now.Add(-5*time.Hour), now.Add(-5*time.Hour))) + require.NoError(t, os.Chtimes(alphaNumericTask, now.Add(-5*time.Hour), now.Add(-5*time.Hour))) + + r := &Runner{ + cfg: &config.Config{ + Runner: config.Runner{ + WorkdirCleanupAge: 2 * time.Hour, + }, + }, + now: func() time.Time { return now }, + } + + r.cleanupStaleTaskDirs(context.Background(), workdirRoot) + + assert.NoDirExists(t, oldTask) + assert.DirExists(t, freshTask) + assert.DirExists(t, nonTask) + assert.DirExists(t, alphaNumericTask) +} + +func TestRunnerCleanupStaleTaskDirsMissingRoot(t *testing.T) { + r := &Runner{ + cfg: &config.Config{ + Runner: config.Runner{WorkdirCleanupAge: time.Hour}, + }, + now: time.Now, + } + + // Must be a silent no-op rather than a warning or panic when the root + // has not yet been created (e.g. the runner has never executed a task). + r.cleanupStaleTaskDirs(context.Background(), filepath.Join(t.TempDir(), "missing")) +} + +func TestRunnerCleanupStaleTaskDirsHonorsContext(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + workdirRoot := filepath.Join(t.TempDir(), "workspace") + require.NoError(t, os.MkdirAll(workdirRoot, 0o700)) + + for i := 1001; i <= 1003; i++ { + dir := filepath.Join(workdirRoot, strconv.Itoa(i)) + require.NoError(t, os.MkdirAll(dir, 0o700)) + require.NoError(t, os.Chtimes(dir, now.Add(-3*time.Hour), now.Add(-3*time.Hour))) + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + r := &Runner{ + cfg: &config.Config{ + Runner: config.Runner{WorkdirCleanupAge: time.Hour}, + }, + now: func() time.Time { return now }, + } + + r.cleanupStaleTaskDirs(ctx, workdirRoot) + + for i := 1001; i <= 1003; i++ { + assert.DirExists(t, filepath.Join(workdirRoot, strconv.Itoa(i))) + } +} + +func TestRunnerShouldRunIdleCleanupThrottles(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + }, + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: time.Hour, + }, + }, + now: func() time.Time { return now }, + } + + assert.True(t, r.shouldRunIdleCleanup()) + + now = now.Add(30 * time.Minute) + assert.False(t, r.shouldRunIdleCleanup()) + + now = now.Add(31 * time.Minute) + assert.True(t, r.shouldRunIdleCleanup()) +} + +func TestRunnerShouldRunIdleCleanupSkipsWhenJobRunning(t *testing.T) { + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + }, + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: time.Minute, + }, + }, + now: time.Now, + } + r.runningCount.Store(1) + + assert.False(t, r.shouldRunIdleCleanup()) +} + +func TestRunnerShouldRunIdleCleanupSkipsWhenBindWorkdirDisabled(t *testing.T) { + r := &Runner{ + cfg: &config.Config{ + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: time.Minute, + }, + }, + now: time.Now, + } + + assert.False(t, r.shouldRunIdleCleanup()) +} + +func TestRunnerShouldRunIdleCleanupSkipsWhenDisabled(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + + t.Run("cleanup age disabled", func(t *testing.T) { + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + }, + Runner: config.Runner{ + WorkdirCleanupAge: -1, + IdleCleanupInterval: time.Minute, + }, + }, + now: func() time.Time { return now }, + } + + assert.False(t, r.shouldRunIdleCleanup()) + }) + + t.Run("idle interval disabled", func(t *testing.T) { + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + }, + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: -1, + }, + }, + now: func() time.Time { return now }, + } + + assert.False(t, r.shouldRunIdleCleanup()) + }) +} + +// TestRunnerOnIdleIntegratesCleanup wires the full OnIdle entry point and +// confirms it walks workdir_parent (after the leading-slash trim that +// matches the production path construction) and removes stale numeric dirs. +func TestRunnerOnIdleIntegratesCleanup(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + root := t.TempDir() + stale := filepath.Join(root, "1234") + require.NoError(t, os.MkdirAll(stale, 0o700)) + require.NoError(t, os.Chtimes(stale, now.Add(-48*time.Hour), now.Add(-48*time.Hour))) + + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + WorkdirParent: root, // leading slash absent, OnIdle reattaches it + }, + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: time.Minute, + }, + }, + now: func() time.Time { return now }, + } + + r.OnIdle(context.Background()) + + assert.NoDirExists(t, stale) +} + +// TestRunnerOnIdleSkipsWhenAlreadyCancelled verifies a pre-cancelled ctx +// short-circuits cleanup before any directory entry is touched. +func TestRunnerOnIdleSkipsWhenAlreadyCancelled(t *testing.T) { + now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC) + root := t.TempDir() + stale := filepath.Join(root, "1234") + require.NoError(t, os.MkdirAll(stale, 0o700)) + require.NoError(t, os.Chtimes(stale, now.Add(-48*time.Hour), now.Add(-48*time.Hour))) + + r := &Runner{ + cfg: &config.Config{ + Container: config.Container{ + BindWorkdir: true, + WorkdirParent: root, + }, + Runner: config.Runner{ + WorkdirCleanupAge: 24 * time.Hour, + IdleCleanupInterval: time.Minute, + }, + }, + now: func() time.Time { return now }, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + r.OnIdle(ctx) + + assert.DirExists(t, stale) +} diff --git a/internal/pkg/config/config.example.yaml b/internal/pkg/config/config.example.yaml index d1551d5b..eded51c3 100644 --- a/internal/pkg/config/config.example.yaml +++ b/internal/pkg/config/config.example.yaml @@ -40,6 +40,12 @@ runner: # The runner uses exponential backoff when idle, increasing the interval up to this maximum. # Set to 0 or same as fetch_interval to disable backoff. fetch_interval_max: 5s + # While idle, remove stale bind-workdir task directories older than this duration. + # Setting either workdir_cleanup_age or idle_cleanup_interval to 0 (or any + # non-positive value) disables workdir cleanup entirely. + workdir_cleanup_age: 24h + # Cadence for the idle stale bind-workdir cleanup pass. + idle_cleanup_interval: 10m # The base interval for periodic log flush to the Gitea instance. # Logs may be sent earlier if the buffer reaches log_report_batch_size # or if log_report_max_latency expires after the first buffered row. @@ -107,6 +113,7 @@ container: # If the path starts with '/', the '/' will be trimmed. # For example, if the parent directory is /path/to/my/dir, workdir_parent should be path/to/my/dir # If it's empty, /workspace will be used. + # Purely numeric subdirectories under this path are reserved for task workspaces and may be removed by idle cleanup. workdir_parent: # Volumes (including bind mounts) can be mounted to containers. Glob syntax is supported, see https://github.com/gobwas/glob # You can specify multiple volumes. If the sequence is empty, no volumes can be mounted. diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go index 48aa9dab..a257a9dd 100644 --- a/internal/pkg/config/config.go +++ b/internal/pkg/config/config.go @@ -33,6 +33,8 @@ type Runner struct { FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources. FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources. FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle. + WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories older than this duration during idle cleanup. + IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale bind-workdir cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence. LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush. LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent. LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size. @@ -92,6 +94,7 @@ type Config struct { // If file is not empty, it will be used to load the configuration. func LoadDefault(file string) (*Config, error) { cfg := &Config{} + definedRunnerKeys := map[string]bool{} if file != "" { content, err := os.ReadFile(file) if err != nil { @@ -100,6 +103,10 @@ func LoadDefault(file string) (*Config, error) { if err := yaml.Unmarshal(content, cfg); err != nil { return nil, fmt.Errorf("parse config file %q: %w", file, err) } + definedRunnerKeys, err = definedRunnerConfigKeys(content) + if err != nil { + return nil, fmt.Errorf("parse config file %q for defaults metadata: %w", file, err) + } } compatibleWithOldEnvs(file != "", cfg) @@ -157,6 +164,12 @@ func LoadDefault(file string) (*Config, error) { if cfg.Runner.FetchIntervalMax <= 0 { cfg.Runner.FetchIntervalMax = 5 * time.Second } + if cfg.Runner.WorkdirCleanupAge == 0 && !definedRunnerKeys["workdir_cleanup_age"] { + cfg.Runner.WorkdirCleanupAge = 24 * time.Hour + } + if cfg.Runner.IdleCleanupInterval == 0 && !definedRunnerKeys["idle_cleanup_interval"] { + cfg.Runner.IdleCleanupInterval = 10 * time.Minute + } if cfg.Runner.LogReportInterval <= 0 { cfg.Runner.LogReportInterval = 5 * time.Second } @@ -199,3 +212,30 @@ func LoadDefault(file string) (*Config, error) { return cfg, nil } + +func definedRunnerConfigKeys(content []byte) (map[string]bool, error) { + var root yaml.Node + if err := yaml.Unmarshal(content, &root); err != nil { + return nil, err + } + + defined := map[string]bool{} + if len(root.Content) == 0 { + return defined, nil + } + + doc := root.Content[0] + for i := 0; i+1 < len(doc.Content); i += 2 { + key := doc.Content[i] + value := doc.Content[i+1] + if key.Value != "runner" || value.Kind != yaml.MappingNode { + continue + } + for j := 0; j+1 < len(value.Content); j += 2 { + defined[value.Content[j].Value] = true + } + break + } + + return defined, nil +} diff --git a/internal/pkg/config/config_test.go b/internal/pkg/config/config_test.go index da0e414b..4986ee94 100644 --- a/internal/pkg/config/config_test.go +++ b/internal/pkg/config/config_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -39,3 +40,80 @@ cache: _, err := LoadDefault(path) require.NoError(t, err) } + +func TestLoadDefault_DefaultsWorkdirCleanupAge(t *testing.T) { + cfg, err := LoadDefault("") + require.NoError(t, err) + assert.Equal(t, 24*time.Hour, cfg.Runner.WorkdirCleanupAge) + assert.Equal(t, 10*time.Minute, cfg.Runner.IdleCleanupInterval) +} + +func TestLoadDefault_UsesConfiguredWorkdirCleanupAge(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte(` +runner: + workdir_cleanup_age: 2h30m +`), 0o600)) + + cfg, err := LoadDefault(path) + require.NoError(t, err) + assert.Equal(t, 150*time.Minute, cfg.Runner.WorkdirCleanupAge) +} + +func TestLoadDefault_UsesConfiguredIdleCleanupInterval(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte(` +runner: + idle_cleanup_interval: 45m +`), 0o600)) + + cfg, err := LoadDefault(path) + require.NoError(t, err) + assert.Equal(t, 45*time.Minute, cfg.Runner.IdleCleanupInterval) +} + +func TestLoadDefault_AllowsDisablingWorkdirCleanup(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte(` +runner: + workdir_cleanup_age: 0s + idle_cleanup_interval: 0s +`), 0o600)) + + cfg, err := LoadDefault(path) + require.NoError(t, err) + assert.Equal(t, time.Duration(0), cfg.Runner.WorkdirCleanupAge) + assert.Equal(t, time.Duration(0), cfg.Runner.IdleCleanupInterval) +} + +func TestLoadDefault_AllowsNegativeWorkdirCleanupValues(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte(` +runner: + workdir_cleanup_age: -1s + idle_cleanup_interval: -1s +`), 0o600)) + + cfg, err := LoadDefault(path) + require.NoError(t, err) + assert.Equal(t, -1*time.Second, cfg.Runner.WorkdirCleanupAge) + assert.Equal(t, -1*time.Second, cfg.Runner.IdleCleanupInterval) +} + +// TestLoadDefault_MalformedYAMLReturnsParseError pins the error surfaced for +// invalid YAML to the canonical "parse config file" message rather than the +// "for defaults metadata" variant — i.e. the main yaml.Unmarshal runs first. +func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte("runner:\n capacity: [unterminated\n"), 0o600)) + + _, err := LoadDefault(path) + require.Error(t, err) + assert.Contains(t, err.Error(), "parse config file") + assert.NotContains(t, err.Error(), "defaults metadata") +}