docker · fiam · Aug 6, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/langchaingo/README.md b/langchaingo/README.md
@@ -33,6 +33,83 @@ docker compose up
 
 No setup, API keys, or additional configuration required.
 
+### Test the project
+
+```sh
+go test -v ./...
+```
+
+This command runs all the tests in the project, using [Testcontainers Go] to spin up the different
+containers needed for the tests:
+
+1. [Docker Model Runner]: a socat container to forward the model runner's API to the test process.
+It allows to talk to the local LLM models, provided by [Docker Desktop], from the test process.
+2. [Docker MCP Gateway]: Docker's MCP gateway container to facilitate the access to the MCP servers and tools.
+It allows to talk to the MCP servers provided by [Docker Desktop], in this case DuckDuckGo, from the test process.
+
+No port conflicts happen, thanks to the [Testcontainers Go] library, which automatically exposes the known ports
+of the containers on a random, free port in the host. Therefore, you can run the tests as many times as you want,
+even without stopping the Docker Compose application.
+
+All containers started by [Testcontainers Go] are automatically cleaned up after the tests finish,
+so you don't need to worry about cleaning them up manually.
+
+#### String comparison tests
+
+This test is a simple test that checks if the answer is correct by comparing it to a reference answer.
+As you can imagine, given the non-deterministic nature of the LLM, this check is not very robust.
+
+Run this test with:
+
+```sh
+go test -v -run TestChat_stringComparison ./...
+```
+
+#### Cosine similarity tests
+
+This test is a more robust test that checks if the answer is correct by using the cosine similarity
+between the reference answer and the answer of the model. To calculate the cosine similarity,
+the test obtains the embeddings of the reference answer and the answer of the model,
+and then calculates the cosine similarity between them. If the result is greater than a threshold,
+which is defined by the team, the test is considered to be passed.
+
+Run this test with:
+
+```sh
+go test -v -run TestChat_embeddings ./...
+```
+
+#### RAG tests
+
+This test is a more robust test that checks if the answer is correct by using the RAG technique.
+It creates a Weaviate store to store the content that will serve as a reference, and it uses the built-in mechanisms
+in the Vector Database to obtain the most relevant documents to the question. Then, it includes
+those relevant documents in the prompt of the LLM to answer the question.
+
+Run this test with:
+
+```sh
+go test -v -run TestChat_rag ./...
+```
+
+#### Evaluator tests
+
+This test uses the concept of [LLM-as-a-judge] to evaluate the accuracy of the answer. It creates an evaluator,
+using another LLM, maybe with a more specialised, different model, to evaluate the accuracy of the answer.
+For that, it uses a strict system message and a user message that forces the LLM to return a JSON object
+with the following fields:
+
++ "provided_answer": the answer to the question
++ "is_correct": true if the answer is correct, false otherwise
++ "reasoning": the reasoning behind the answer
+The response should be a valid JSON object.
+
+Run this test with:
+
+```sh
+go test -v -run TestChat_usingEvaluator ./...
+```
+
 # 🧠 Inference Options
 
 By default, this project uses [Docker Model Runner] to handle LLM inference locally — no internet
@@ -105,13 +182,17 @@ flowchart TD
 + [Langchaingo]
 + [DuckDuckGo]
 + [Docker Compose]
++ [Testcontainers Go]
 
 [DuckDuckGo]: https://duckduckgo.com
 [Langchaingo]: https://github.com/tmc/langchaingo
+[LLM-as-a-judge]: https://eugeneyan.com/writing/llm-evaluators/
+[Testcontainers Go]: https://github.com/testcontainers/testcontainers-go
 [Model Context Protocol's Go SDK]: https://github.com/modelcontextprotocol/go-sdk/
 [Docker Compose]: https://github.com/docker/compose
 [Docker Desktop]: https://www.docker.com/products/docker-desktop/
 [Docker Engine]: https://docs.docker.com/engine/
 [Docker Model Runner]: https://docs.docker.com/ai/model-runner/
+[Docker MCP Gateway]: https://docs.docker.com/ai/mcp-gateway/
 [Docker Model Runner requirements]: https://docs.docker.com/ai/model-runner/
 [Docker Offload]: https://www.docker.com/products/docker-offload/
diff --git a/langchaingo/chat.go b/langchaingo/chat.go
@@ -0,0 +1,43 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+	"github.com/tmc/langchaingo/agents"
+	"github.com/tmc/langchaingo/callbacks"
+	"github.com/tmc/langchaingo/chains"
+)
+
+// chat is the main function that initializes the LLM, MCP tools, and runs the agent.
+// It receives the question and the MCP gateway URL, returning the answer from the agent.
+func chat(question string, mcpGatewayURL string, apiKey string, baseURL string, modelName string, agentOpts ...agents.Option) (string, error) {
+	llm, err := initializeLLM(apiKey, baseURL, modelName)
+	if err != nil {
+		return "", fmt.Errorf("initialize LLM: %v", err)
+	}
+
+	// Create a new client, with no features.
+	client := mcp.NewClient(&mcp.Implementation{Name: "mcp-client", Version: "v1.0.0"}, nil)
+
+	toolBelt, err := initializeMCPTools(client, mcpGatewayURL)
+	if err != nil {
+		return "", fmt.Errorf("initialize MCP tools: %v", err)
+	}
+
+	if os.Getenv("DEBUG") == "true" {
+		agentOpts = append(agentOpts, agents.WithCallbacksHandler(callbacks.LogHandler{}))
+	}
+
+	agent := agents.NewOneShotAgent(llm, toolBelt, agentOpts...)
+	executor := agents.NewExecutor(agent)
+
+	answer, err := chains.Run(context.Background(), executor, question)
+	if err != nil {
+		return "", fmt.Errorf("chains run: %v", err)
+	}
+
+	return answer, nil
+}
diff --git a/langchaingo/chat_test.go b/langchaingo/chat_test.go
@@ -0,0 +1,205 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/testcontainers/testcontainers-go"
+	dmcpg "github.com/testcontainers/testcontainers-go/modules/dockermcpgateway"
+	"github.com/testcontainers/testcontainers-go/modules/dockermodelrunner"
+	"github.com/tmc/langchaingo/agents"
+	"github.com/tmc/langchaingo/embeddings"
+	"github.com/tmc/langchaingo/schema"
+	"github.com/tmc/langchaingo/vectorstores"
+)
+
+const (
+	modelNamespace = "ai"
+	modelName      = "gemma3-qat"
+	modelTag       = "latest"
+	fqModelName    = modelNamespace + "/" + modelName + ":" + modelTag
+)
+
+func TestChat_stringComparison(t *testing.T) {
+	ctx := context.Background()
+
+	// Docker Model Runner container, which talks to Docker Desktop's model runner
+	dmrCtr, err := dockermodelrunner.Run(ctx, dockermodelrunner.WithModel(fqModelName))
+	testcontainers.CleanupContainer(t, dmrCtr)
+	require.NoError(t, err)
+
+	// Docker MCP Gateway container, which talks to the MCP servers, in this case DuckDuckGo
+	mcpgCtr, err := dmcpg.Run(
+		ctx, "docker/mcp-gateway:latest",
+		dmcpg.WithTools("duckduckgo", []string{"search", "fetch_content"}),
+	)
+	testcontainers.CleanupContainer(t, mcpgCtr)
+	require.NoError(t, err)
+
+	mcpGatewayURL, err := mcpgCtr.GatewayEndpoint(ctx)
+	require.NoError(t, err)
+
+	question := "Does Golang support the Model Context Protocol? Please provide some references."
+
+	answer, err := chat(question, mcpGatewayURL, "no-apiKey", dmrCtr.OpenAIEndpoint(), fqModelName)
+	require.NoError(t, err)
+	require.NotEmpty(t, answer)
+	require.Contains(t, answer, "https://github.com/modelcontextprotocol/go-sdk")
+}
+
+func TestChat_embeddings(t *testing.T) {
+	embeddingModel, dmrBaseURL := buildEmbeddingsModel(t)
+
+	embedder, err := embeddings.NewEmbedder(embeddingModel)
+	require.NoError(t, err)
+
+	reference := `Golang does have an official Go SDK for Model Context Protocol servers and clients, which is maintained in collaboration with Google.
+It's URL is https://github.com/modelcontextprotocol/go-sdk`
+
+	// calculate the embeddings for the reference answer
+	referenceEmbeddings, err := embedder.EmbedDocuments(context.Background(), []string{reference})
+	require.NoError(t, err)
+
+	ctx := context.Background()
+
+	// Docker MCP Gateway container, which talks to the MCP servers, in this case DuckDuckGo
+	mcpgCtr, err := dmcpg.Run(
+		ctx, "docker/mcp-gateway:latest",
+		dmcpg.WithTools("duckduckgo", []string{"search", "fetch_content"}),
+	)
+	testcontainers.CleanupContainer(t, mcpgCtr)
+	require.NoError(t, err)
+
+	mcpGatewayURL, err := mcpgCtr.GatewayEndpoint(ctx)
+	require.NoError(t, err)
+
+	question := "Does Golang support the Model Context Protocol? Please provide some references."
+	answer, err := chat(question, mcpGatewayURL, "no-apiKey", dmrBaseURL, fqModelName)
+	require.NoError(t, err)
+	require.NotEmpty(t, answer)
+
+	t.Logf("answer: %s", answer)
+
+	// calculate the embeddings for the answer of the model
+	answerEmbeddings, err := embedder.EmbedDocuments(context.Background(), []string{answer})
+	require.NoError(t, err)
+
+	// calculate the cosine similarity between the reference and the answer
+	cosineSimilarity := cosineSimilarity(t, referenceEmbeddings[0], answerEmbeddings[0])
+	t.Logf("cosine similarity: %f", cosineSimilarity)
+
+	// Define a threshold for the cosine similarity: this is a team decision to accept or reject the answer
+	// within the given threshold.
+	require.Greater(t, cosineSimilarity, float32(0.8))
+}
+
+func TestChat_rag(t *testing.T) {
+	const question = "Does Golang support the Model Context Protocol? Please provide some references."
+
+	embeddingModel, dmrBaseURL := buildEmbeddingsModel(t)
+
+	embedder, err := embeddings.NewEmbedder(embeddingModel)
+	require.NoError(t, err)
+
+	reference := `Golang does have an official Go SDK for Model Context Protocol servers and clients, which is maintained in collaboration with Google.
+It's URL is https://github.com/modelcontextprotocol/go-sdk`
+
+	// create a new Weaviate store to store the reference answer
+	store, err := NewStore(t, embedder)
+	require.NoError(t, err)
+
+	_, err = store.AddDocuments(context.Background(), []schema.Document{
+		{
+			PageContent: reference,
+		},
+	})
+	require.NoError(t, err)
+
+	optionsVector := []vectorstores.Option{
+		vectorstores.WithScoreThreshold(0.80), // use for precision, when you want to get only the most relevant documents
+		vectorstores.WithEmbedder(embedder),   // use when you want add documents or doing similarity search
+	}
+
+	relevantDocs, err := store.SimilaritySearch(context.Background(), question, 1, optionsVector...)
+	require.NoError(t, err)
+	require.NotEmpty(t, relevantDocs)
+
+	ctx := context.Background()
+
+	// Docker MCP Gateway container, which talks to the MCP servers, in this case DuckDuckGo
+	mcpgCtr, err := dmcpg.Run(
+		ctx, "docker/mcp-gateway:latest",
+		dmcpg.WithTools("duckduckgo", []string{"search", "fetch_content"}),
+	)
+	testcontainers.CleanupContainer(t, mcpgCtr)
+	require.NoError(t, err)
+
+	mcpGatewayURL, err := mcpgCtr.GatewayEndpoint(ctx)
+	require.NoError(t, err)
+
+	answer, err := chat(
+		question,
+		mcpGatewayURL,
+		"no-apiKey",
+		dmrBaseURL,
+		fqModelName,
+		agents.WithPromptSuffix(fmt.Sprintf("Use the following relevant documents to answer the question: %s", relevantDocs[0].PageContent)),
+	)
+	require.NoError(t, err)
+	require.NotEmpty(t, answer)
+
+	t.Logf("answer: %s", answer)
+}
+
+func TestChat_usingEvaluator(t *testing.T) {
+	ctx := context.Background()
+
+	// Docker Model Runner container, which talks to Docker Desktop's model runner
+	dmrCtr, err := dockermodelrunner.Run(ctx, dockermodelrunner.WithModel(fqModelName))
+	testcontainers.CleanupContainer(t, dmrCtr)
+	require.NoError(t, err)
+
+	// Docker MCP Gateway container, which talks to the MCP servers, in this case DuckDuckGo
+	mcpgCtr, err := dmcpg.Run(
+		ctx, "docker/mcp-gateway:latest",
+		dmcpg.WithTools("duckduckgo", []string{"search", "fetch_content"}),
+	)
+	testcontainers.CleanupContainer(t, mcpgCtr)
+	require.NoError(t, err)
+
+	mcpGatewayURL, err := mcpgCtr.GatewayEndpoint(ctx)
+	require.NoError(t, err)
+
+	question := "Does Golang support the Model Context Protocol? Please provide some references."
+
+	answer, err := chat(question, mcpGatewayURL, "no-apiKey", dmrCtr.OpenAIEndpoint(), fqModelName)
+	require.NoError(t, err)
+	require.NotEmpty(t, answer)
+
+	t.Logf("answer: %s", answer)
+
+	// cross the answer with the evaluator
+	reference := `There is an official Go SDK for Model Context Protocol servers and clients, which is maintained in collaboration with Google.
+It's URL is https://github.com/modelcontextprotocol/go-sdk`
+
+	evaluator := NewEvaluator(question, fqModelName, "no-apiKey", dmrCtr.OpenAIEndpoint())
+	evaluation, err := evaluator.Evaluate(ctx, question, answer, reference)
+	require.NoError(t, err)
+	t.Logf("evaluation: %#v", evaluation)
+
+	type evalResponse struct {
+		ProvidedAnswer string `json:"provided_answer"`
+		IsCorrect      bool   `json:"is_correct"`
+		Reasoning      string `json:"reasoning"`
+	}
+
+	var eval evalResponse
+	err = json.Unmarshal([]byte(evaluation), &eval)
+	require.NoError(t, err)
+
+	t.Logf("evaluation: %#v", eval)
+	require.True(t, eval.IsCorrect)
+}