From 6e8fdcce25203394de5e98f4979d2eb76468eed8 Mon Sep 17 00:00:00 2001
From: Etienne Perot <eperot@google.com>
Date: Tue, 9 Jan 2024 13:04:11 -0800
Subject: [PATCH] Increase ollama container start timeout.

This test seems flaky. Starting ollama may take time, especially since
the start step includes waiting for the Docker image to download (which is
several gigabytes), reading logs, and loading the first model (again a few
gigabytes to load into the GPU).

This also adds extra logging which should help with checking what the timing
of each operation is.

PiperOrigin-RevId: 597021140
---
 test/gpu/ollama/ollama.go | 6 ++++++
 test/gpu/textgen_test.go  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/gpu/ollama/ollama.go b/test/gpu/ollama/ollama.go
index 0a816c1a1..55f9670fc 100644
--- a/test/gpu/ollama/ollama.go
+++ b/test/gpu/ollama/ollama.go
@@ -52,11 +52,13 @@ type Ollama struct {
 // New starts a new Ollama server in the given container,
 // then waits for it to serve and returns the client.
 func New(ctx context.Context, cont *dockerutil.Container, logger testutil.Logger) (*Ollama, error) {
+	started := time.Now()
 	opts := dockerutil.GPURunOpts()
 	opts.Image = "gpu/ollama"
 	if err := cont.Spawn(ctx, opts); err != nil {
 		return nil, fmt.Errorf("could not start ollama: %v", err)
 	}
+	logger.Logf("Started ollama container in %v", time.Since(started))
 	llm := &Ollama{
 		container: cont,
 		logger:    logger,
@@ -66,6 +68,7 @@ func New(ctx context.Context, cont *dockerutil.Container, logger testutil.Logger
 	if err := llm.WaitUntilServing(ctx); err != nil {
 		return nil, fmt.Errorf("ollama did not come up for serving: %w", err)
 	}
+	logger.Logf("Ollama serving API requests after %v", time.Since(started))
 
 	// Get list of model names.
 	modelNames, err := llm.listModelNames(ctx)
@@ -76,6 +79,7 @@ func New(ctx context.Context, cont *dockerutil.Container, logger testutil.Logger
 		return nil, errors.New("no models available")
 	}
 	llm.ModelNames = modelNames
+	logger.Logf("Available ollama model names: %v (loaded %v since container start)", modelNames, time.Since(started))
 
 	// Load the first model.
 	// This is necessary to force ollama to load a model, without which
@@ -87,6 +91,7 @@ func New(ctx context.Context, cont *dockerutil.Container, logger testutil.Logger
 	if err != nil {
 		return nil, fmt.Errorf("could not load first model %q: %w", llm.ModelNames[0], err)
 	}
+	logger.Logf("Loaded first ollama model %q (%v since container start)", llm.ModelNames[0], time.Since(started))
 
 	// Now go over the logs and check if the GPU was used.
 	logs, err := llm.container.Logs(ctx)
@@ -101,6 +106,7 @@ func New(ctx context.Context, cont *dockerutil.Container, logger testutil.Logger
 	default:
 		return nil, fmt.Errorf("cannot determine whether ollama is using GPU from logs:\n%s", logs)
 	}
+	logger.Logf("Ollama successfully initialized in a total of %v", time.Since(started))
 	return llm, nil
 }
 
diff --git a/test/gpu/textgen_test.go b/test/gpu/textgen_test.go
index 9cceb2386..7e2cf35cb 100644
--- a/test/gpu/textgen_test.go
+++ b/test/gpu/textgen_test.go
@@ -70,7 +70,7 @@ func TestLLM(t *testing.T) {
 	// Run the LLM.
 	llmContainer := dockerutil.MakeContainer(ctx, t)
 	defer llmContainer.CleanUp(ctx)
-	startCtx, startCancel := context.WithTimeout(ctx, 30*time.Second)
+	startCtx, startCancel := context.WithTimeout(ctx, 3*time.Minute)
 	llm, err := ollama.New(startCtx, llmContainer, t)
 	startCancel()
 	if err != nil {