mirror of
https://gitea.com/gitea/act_runner
synced 2026-05-06 01:37:57 +02:00
fix(host): correct host workspace cleanup on Windows (#883)
## Summary - Fix host-mode cleanup to remove the job **workspace** directory after a run (instead of leaving checkouts behind). - On Windows, track step process PIDs and terminate remaining process trees during teardown before attempting workspace deletion (prevents file-lock failures). - Skip workspace deletion when `bind_workdir` is enabled to avoid conflicting with runner-level task directory cleanup. ## Implementation details - `HostEnvironment` now records PIDs for started commands and best-effort terminates them on Windows during `Remove()`. - Workspace removal uses a small retry loop on Windows to handle transient locks. - `BindWorkdir` is propagated into `HostEnvironment` so cleanup behavior matches runner configuration. --------- Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com> Reviewed-on: https://gitea.com/gitea/runner/pulls/883 Reviewed-by: silverwind <2021+silverwind@noreply.gitea.com>
This commit is contained in:
@@ -16,7 +16,9 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
@@ -34,9 +36,15 @@ type HostEnvironment struct {
|
||||
TmpDir string
|
||||
ToolCache string
|
||||
Workdir string
|
||||
ActPath string
|
||||
CleanUp func()
|
||||
StdOut io.Writer
|
||||
// BindWorkdir is true when the app runner mounts the workspace on the host and
|
||||
// deletes the task directory after the job; host teardown must not remove Workdir.
|
||||
BindWorkdir bool
|
||||
ActPath string
|
||||
CleanUp func()
|
||||
StdOut io.Writer
|
||||
|
||||
mu sync.Mutex
|
||||
runningPIDs map[int]struct{}
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) Create(_, _ []string) common.Executor {
|
||||
@@ -344,7 +352,25 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
||||
if ppty != nil {
|
||||
go writeKeepAlive(ppty)
|
||||
}
|
||||
err = cmd.Run()
|
||||
// Split Start/Wait so the PID can be registered before the process can exit;
|
||||
// cmd.Run() would block until exit, by which time the PID may have been reused.
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
if cmd.Process != nil {
|
||||
e.mu.Lock()
|
||||
if e.runningPIDs == nil {
|
||||
e.runningPIDs = map[int]struct{}{}
|
||||
}
|
||||
e.runningPIDs[cmd.Process.Pid] = struct{}{}
|
||||
e.mu.Unlock()
|
||||
defer func(pid int) {
|
||||
e.mu.Lock()
|
||||
delete(e.runningPIDs, pid)
|
||||
e.mu.Unlock()
|
||||
}(cmd.Process.Pid)
|
||||
}
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -385,12 +411,83 @@ func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string)
|
||||
return parseEnvFile(e, srcPath, env)
|
||||
}
|
||||
|
||||
func removePathWithRetry(ctx context.Context, path string) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
attempts := 1
|
||||
delay := time.Duration(0)
|
||||
if runtime.GOOS == "windows" {
|
||||
attempts = 5
|
||||
delay = 200 * time.Millisecond
|
||||
}
|
||||
var lastErr error
|
||||
for i := 0; i < attempts; i++ {
|
||||
if i > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(delay):
|
||||
}
|
||||
}
|
||||
lastErr = os.RemoveAll(path)
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) terminateRunningProcesses(ctx context.Context) {
|
||||
if runtime.GOOS != "windows" {
|
||||
return
|
||||
}
|
||||
e.mu.Lock()
|
||||
pids := make([]int, 0, len(e.runningPIDs))
|
||||
for pid := range e.runningPIDs {
|
||||
pids = append(pids, pid)
|
||||
}
|
||||
e.mu.Unlock()
|
||||
|
||||
if len(pids) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
logger := common.Logger(ctx)
|
||||
for _, pid := range pids {
|
||||
// Best-effort: forcibly terminate process tree to release file handles
|
||||
// so that workspace cleanup can succeed on Windows.
|
||||
cmd := exec.CommandContext(ctx, "taskkill", "/PID", strconv.Itoa(pid), "/T", "/F")
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
logger.Debugf("taskkill failed for pid=%d: %v output=%s", pid, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) Remove() common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
// Ensure any lingering child processes are ended before attempting
|
||||
// to remove the workspace (Windows file locks otherwise prevent cleanup).
|
||||
e.terminateRunningProcesses(ctx)
|
||||
|
||||
// Only removes per-job misc state. Must not remove the cache/toolcache root.
|
||||
if e.CleanUp != nil {
|
||||
e.CleanUp()
|
||||
}
|
||||
return os.RemoveAll(e.Path)
|
||||
logger := common.Logger(ctx)
|
||||
var errs []error
|
||||
if err := removePathWithRetry(ctx, e.Path); err != nil {
|
||||
logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if !e.BindWorkdir && e.Workdir != "" {
|
||||
if err := removePathWithRetry(ctx, e.Workdir); err != nil {
|
||||
logger.Warnf("failed to remove host workspace %s: %v", e.Workdir, err)
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,11 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// Type assert HostEnvironment implements ExecutionsEnvironment
|
||||
@@ -69,3 +73,51 @@ func TestGetContainerArchive(t *testing.T) {
|
||||
_, err = reader.Next()
|
||||
assert.ErrorIs(t, err, io.EOF)
|
||||
}
|
||||
|
||||
func TestHostEnvironmentRemoveCleansWorkdir(t *testing.T) {
|
||||
logger := logrus.New()
|
||||
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||
base := t.TempDir()
|
||||
miscRoot := filepath.Join(base, "misc")
|
||||
path := filepath.Join(miscRoot, "hostexecutor")
|
||||
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||
workdir := filepath.Join(base, "workspace", "owner", "repo")
|
||||
require.NoError(t, os.MkdirAll(workdir, 0o700))
|
||||
|
||||
e := &HostEnvironment{
|
||||
Path: path,
|
||||
Workdir: workdir,
|
||||
BindWorkdir: false,
|
||||
CleanUp: func() {
|
||||
_ = os.RemoveAll(miscRoot)
|
||||
},
|
||||
StdOut: os.Stdout,
|
||||
}
|
||||
require.NoError(t, e.Remove()(ctx))
|
||||
_, err := os.Stat(workdir)
|
||||
assert.ErrorIs(t, err, os.ErrNotExist)
|
||||
}
|
||||
|
||||
func TestHostEnvironmentRemoveSkipsWorkdirWhenBindWorkdir(t *testing.T) {
|
||||
logger := logrus.New()
|
||||
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||
base := t.TempDir()
|
||||
miscRoot := filepath.Join(base, "misc")
|
||||
path := filepath.Join(miscRoot, "hostexecutor")
|
||||
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||
workdir := filepath.Join(base, "workspace", "123", "owner", "repo")
|
||||
require.NoError(t, os.MkdirAll(workdir, 0o700))
|
||||
|
||||
e := &HostEnvironment{
|
||||
Path: path,
|
||||
Workdir: workdir,
|
||||
BindWorkdir: true,
|
||||
CleanUp: func() {
|
||||
_ = os.RemoveAll(miscRoot)
|
||||
},
|
||||
StdOut: os.Stdout,
|
||||
}
|
||||
require.NoError(t, e.Remove()(ctx))
|
||||
_, err := os.Stat(workdir)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
@@ -220,11 +220,12 @@ func (rc *RunContext) startHostEnvironment() common.Executor {
|
||||
}
|
||||
toolCache := filepath.Join(cacheDir, "tool_cache")
|
||||
rc.JobContainer = &container.HostEnvironment{
|
||||
Path: path,
|
||||
TmpDir: runnerTmp,
|
||||
ToolCache: toolCache,
|
||||
Workdir: rc.Config.Workdir,
|
||||
ActPath: actPath,
|
||||
Path: path,
|
||||
TmpDir: runnerTmp,
|
||||
ToolCache: toolCache,
|
||||
Workdir: rc.Config.Workdir,
|
||||
BindWorkdir: rc.Config.BindWorkdir,
|
||||
ActPath: actPath,
|
||||
CleanUp: func() {
|
||||
os.RemoveAll(miscpath)
|
||||
},
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -238,6 +239,13 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
|
||||
workdirParent = fmt.Sprintf("%s/%d", workdirParent, task.Id)
|
||||
}
|
||||
workdir := filepath.FromSlash(fmt.Sprintf("/%s/%s", workdirParent, preset.Repository))
|
||||
if runtime.GOOS == "windows" {
|
||||
if abs, err := filepath.Abs(workdir); err == nil {
|
||||
workdir = abs
|
||||
}
|
||||
}
|
||||
// Without bind_workdir, the workspace path omits the task id; concurrent host-mode jobs
|
||||
// for the same repository would share this directory and can race with per-job cleanup.
|
||||
|
||||
runnerConfig := &runner.Config{
|
||||
// On Linux, Workdir will be like "/<parent_directory>/<owner>/<repo>"
|
||||
|
||||
Reference in New Issue
Block a user