1
0
mirror of https://gitea.com/gitea/act_runner synced 2026-05-06 01:37:57 +02:00

fix(host): correct host workspace cleanup on Windows (#883)

## Summary
- Fix host-mode cleanup to remove the job **workspace** directory after a run (instead of leaving checkouts behind).
- On Windows, track step process PIDs and terminate remaining process trees during teardown before attempting workspace deletion (prevents file-lock failures).
- Skip workspace deletion when `bind_workdir` is enabled to avoid conflicting with runner-level task directory cleanup.

## Implementation details
- `HostEnvironment` now records PIDs for started commands and best-effort terminates them on Windows during `Remove()`.
- Workspace removal uses a small retry loop on Windows to handle transient locks.
- `BindWorkdir` is propagated into `HostEnvironment` so cleanup behavior matches runner configuration.

---------

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com>
Reviewed-on: https://gitea.com/gitea/runner/pulls/883
Reviewed-by: silverwind <2021+silverwind@noreply.gitea.com>
This commit is contained in:
Nicolas
2026-05-05 18:28:12 +00:00
parent b68ecf2580
commit a22119cf88
4 changed files with 168 additions and 10 deletions

View File

@@ -16,7 +16,9 @@ import (
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
"gitea.com/gitea/runner/act/common"
@@ -34,9 +36,15 @@ type HostEnvironment struct {
TmpDir string
ToolCache string
Workdir string
ActPath string
CleanUp func()
StdOut io.Writer
// BindWorkdir is true when the app runner mounts the workspace on the host and
// deletes the task directory after the job; host teardown must not remove Workdir.
BindWorkdir bool
ActPath string
CleanUp func()
StdOut io.Writer
mu sync.Mutex
runningPIDs map[int]struct{}
}
func (e *HostEnvironment) Create(_, _ []string) common.Executor {
@@ -344,7 +352,25 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
if ppty != nil {
go writeKeepAlive(ppty)
}
err = cmd.Run()
// Split Start/Wait so the PID can be registered before the process can exit;
// cmd.Run() would block until exit, by which time the PID may have been reused.
if err := cmd.Start(); err != nil {
return err
}
if cmd.Process != nil {
e.mu.Lock()
if e.runningPIDs == nil {
e.runningPIDs = map[int]struct{}{}
}
e.runningPIDs[cmd.Process.Pid] = struct{}{}
e.mu.Unlock()
defer func(pid int) {
e.mu.Lock()
delete(e.runningPIDs, pid)
e.mu.Unlock()
}(cmd.Process.Pid)
}
err = cmd.Wait()
if err != nil {
return err
}
@@ -385,12 +411,83 @@ func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string)
return parseEnvFile(e, srcPath, env)
}
func removePathWithRetry(ctx context.Context, path string) error {
if path == "" {
return nil
}
attempts := 1
delay := time.Duration(0)
if runtime.GOOS == "windows" {
attempts = 5
delay = 200 * time.Millisecond
}
var lastErr error
for i := 0; i < attempts; i++ {
if i > 0 {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(delay):
}
}
lastErr = os.RemoveAll(path)
if lastErr == nil {
return nil
}
}
return lastErr
}
func (e *HostEnvironment) terminateRunningProcesses(ctx context.Context) {
if runtime.GOOS != "windows" {
return
}
e.mu.Lock()
pids := make([]int, 0, len(e.runningPIDs))
for pid := range e.runningPIDs {
pids = append(pids, pid)
}
e.mu.Unlock()
if len(pids) == 0 {
return
}
logger := common.Logger(ctx)
for _, pid := range pids {
// Best-effort: forcibly terminate process tree to release file handles
// so that workspace cleanup can succeed on Windows.
cmd := exec.CommandContext(ctx, "taskkill", "/PID", strconv.Itoa(pid), "/T", "/F")
out, err := cmd.CombinedOutput()
if err != nil {
logger.Debugf("taskkill failed for pid=%d: %v output=%s", pid, err, strings.TrimSpace(string(out)))
}
}
}
func (e *HostEnvironment) Remove() common.Executor {
return func(ctx context.Context) error {
// Ensure any lingering child processes are ended before attempting
// to remove the workspace (Windows file locks otherwise prevent cleanup).
e.terminateRunningProcesses(ctx)
// Only removes per-job misc state. Must not remove the cache/toolcache root.
if e.CleanUp != nil {
e.CleanUp()
}
return os.RemoveAll(e.Path)
logger := common.Logger(ctx)
var errs []error
if err := removePathWithRetry(ctx, e.Path); err != nil {
logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
errs = append(errs, err)
}
if !e.BindWorkdir && e.Workdir != "" {
if err := removePathWithRetry(ctx, e.Workdir); err != nil {
logger.Warnf("failed to remove host workspace %s: %v", e.Workdir, err)
errs = append(errs, err)
}
}
return errors.Join(errs...)
}
}

View File

@@ -13,7 +13,11 @@ import (
"path/filepath"
"testing"
"gitea.com/gitea/runner/act/common"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// Type assert HostEnvironment implements ExecutionsEnvironment
@@ -69,3 +73,51 @@ func TestGetContainerArchive(t *testing.T) {
_, err = reader.Next()
assert.ErrorIs(t, err, io.EOF)
}
func TestHostEnvironmentRemoveCleansWorkdir(t *testing.T) {
logger := logrus.New()
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
base := t.TempDir()
miscRoot := filepath.Join(base, "misc")
path := filepath.Join(miscRoot, "hostexecutor")
require.NoError(t, os.MkdirAll(path, 0o700))
workdir := filepath.Join(base, "workspace", "owner", "repo")
require.NoError(t, os.MkdirAll(workdir, 0o700))
e := &HostEnvironment{
Path: path,
Workdir: workdir,
BindWorkdir: false,
CleanUp: func() {
_ = os.RemoveAll(miscRoot)
},
StdOut: os.Stdout,
}
require.NoError(t, e.Remove()(ctx))
_, err := os.Stat(workdir)
assert.ErrorIs(t, err, os.ErrNotExist)
}
func TestHostEnvironmentRemoveSkipsWorkdirWhenBindWorkdir(t *testing.T) {
logger := logrus.New()
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
base := t.TempDir()
miscRoot := filepath.Join(base, "misc")
path := filepath.Join(miscRoot, "hostexecutor")
require.NoError(t, os.MkdirAll(path, 0o700))
workdir := filepath.Join(base, "workspace", "123", "owner", "repo")
require.NoError(t, os.MkdirAll(workdir, 0o700))
e := &HostEnvironment{
Path: path,
Workdir: workdir,
BindWorkdir: true,
CleanUp: func() {
_ = os.RemoveAll(miscRoot)
},
StdOut: os.Stdout,
}
require.NoError(t, e.Remove()(ctx))
_, err := os.Stat(workdir)
require.NoError(t, err)
}

View File

@@ -220,11 +220,12 @@ func (rc *RunContext) startHostEnvironment() common.Executor {
}
toolCache := filepath.Join(cacheDir, "tool_cache")
rc.JobContainer = &container.HostEnvironment{
Path: path,
TmpDir: runnerTmp,
ToolCache: toolCache,
Workdir: rc.Config.Workdir,
ActPath: actPath,
Path: path,
TmpDir: runnerTmp,
ToolCache: toolCache,
Workdir: rc.Config.Workdir,
BindWorkdir: rc.Config.BindWorkdir,
ActPath: actPath,
CleanUp: func() {
os.RemoveAll(miscpath)
},

View File

@@ -12,6 +12,7 @@ import (
"net/http"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"sync/atomic"
@@ -238,6 +239,13 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
workdirParent = fmt.Sprintf("%s/%d", workdirParent, task.Id)
}
workdir := filepath.FromSlash(fmt.Sprintf("/%s/%s", workdirParent, preset.Repository))
if runtime.GOOS == "windows" {
if abs, err := filepath.Abs(workdir); err == nil {
workdir = abs
}
}
// Without bind_workdir, the workspace path omits the task id; concurrent host-mode jobs
// for the same repository would share this directory and can race with per-job cleanup.
runnerConfig := &runner.Config{
// On Linux, Workdir will be like "/<parent_directory>/<owner>/<repo>"