Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5c23012
feat: Add feature to enable dynamic instance types via workflow labels
npalm Apr 8, 2025
6be3726
docs: auto update terraform docs
Apr 8, 2025
4c1f2fa
feat: allow to use dynamic instance type in multiple events
edersonbrilhante Jan 19, 2026
e485239
style: fix format
edersonbrilhante Jan 19, 2026
2c2ec39
fix: add dynamic labels as runner labels
edersonbrilhante Jan 19, 2026
886412f
test: add tests for dynamic labels
edersonbrilhante Jan 19, 2026
f7fc866
fix: wire enable_dynamic_ec2_config
edersonbrilhante Jan 19, 2026
5f56ef9
fix: fix runner owner logic
edersonbrilhante Jan 19, 2026
0c026d7
chore: decrease log level for some logs
edersonbrilhante Jan 19, 2026
e93e296
style: fix format issues
edersonbrilhante Jan 19, 2026
079ecee
feat: add support for all fields in FleetLaunchTemplateOverridesRequest
edersonbrilhante Jan 20, 2026
d478870
style: fix formatting issues
edersonbrilhante Jan 20, 2026
6561720
fix: convert instanceTypes to constant
edersonbrilhante Jan 20, 2026
6c94342
test: fix test cases
edersonbrilhante Jan 20, 2026
1460582
fix: fix imports
edersonbrilhante Jan 20, 2026
3c4a197
docs: update function docs for parseEc2OverrideConfig
edersonbrilhante Jan 20, 2026
c65665f
feat: allow use any dynamic label with prefix ghr-
edersonbrilhante Jan 30, 2026
f808607
style: fix formatting
edersonbrilhante Jan 30, 2026
bdb86f4
docs: auto update terraform docs
github-actions[bot] Jan 30, 2026
e02c418
fix: fix wrong variable
edersonbrilhante Jan 30, 2026
ce82eb1
fix: replace old envvar for new envvar
edersonbrilhante Jan 30, 2026
5742c07
style: fix tflint
edersonbrilhante Jan 30, 2026
e810cec
fix: fix envvar in dispatcher
edersonbrilhante Mar 9, 2026
b49cd27
test: remove wrong tests
edersonbrilhante Mar 9, 2026
9192bd0
docs: add documentation and mask as experimental
edersonbrilhante Mar 10, 2026
b8a3c42
docs: update variable description
edersonbrilhante Mar 10, 2026
e528373
docs: auto update terraform docs
github-actions[bot] Mar 10, 2026
e67a37e
docs: auto update terraform docs
github-actions[bot] Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh)
| <a name="input_disable_runner_autoupdate"></a> [disable\_runner\_autoupdate](#input\_disable\_runner\_autoupdate) | Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/) | `bool` | `false` | no |
| <a name="input_enable_ami_housekeeper"></a> [enable\_ami\_housekeeper](#input\_enable\_ami\_housekeeper) | Option to disable the lambda to clean up old AMIs. | `bool` | `false` | no |
| <a name="input_enable_cloudwatch_agent"></a> [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enables the cloudwatch agent on the ec2 runner instances. The runner uses a default config that can be overridden via `cloudwatch_config`. | `bool` | `true` | no |
| <a name="input_enable_dynamic_labels"></a> [enable\_dynamic\_labels](#input\_enable\_dynamic\_labels) | Experimental! Can be removed / changed without trigger a major release. Enable dynamic EC2 configs based on workflow job labels. When enabled, jobs can request specific configs via the 'gh-ec2-<config type key>:<config type value>' label (e.g., 'gh-ec2-instance-type:t3.large'). | `bool` | `false` | no |
| <a name="input_enable_ephemeral_runners"></a> [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no |
| <a name="input_enable_jit_config"></a> [enable\_jit\_config](#input\_enable\_jit\_config) | Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is available. In case you are upgrading from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI. | `bool` | `null` | no |
| <a name="input_enable_job_queued_check"></a> [enable\_job\_queued\_check](#input\_enable\_job\_queued\_check) | Only scale if the job event received by the scale up lambda is in the queued state. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior. | `bool` | `null` | no |
Expand Down
206 changes: 206 additions & 0 deletions docs/configuration.md

Large diffs are not rendered by default.

24 changes: 23 additions & 1 deletion lambdas/functions/control-plane/src/aws/runners.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import { DefaultTargetCapacityType, SpotAllocationStrategy } from '@aws-sdk/client-ec2';
import {
DefaultTargetCapacityType,
InstanceRequirementsRequest,
SpotAllocationStrategy,
_InstanceType,
Placement,
FleetBlockDeviceMappingRequest,
} from '@aws-sdk/client-ec2';

export type RunnerType = 'Org' | 'Repo';

Expand Down Expand Up @@ -29,6 +36,20 @@ export interface ListRunnerFilters {
statuses?: string[];
}

export interface Ec2OverrideConfig {
InstanceType?: _InstanceType;
MaxPrice?: string;
SubnetId?: string;
AvailabilityZone?: string;
WeightedCapacity?: number;
Priority?: number;
Placement?: Placement;
BlockDeviceMappings?: FleetBlockDeviceMappingRequest[];
InstanceRequirements?: InstanceRequirementsRequest;
ImageId?: string;
AvailabilityZoneId?: string;
}

export interface RunnerInputParameters {
environment: string;
runnerType: RunnerType;
Expand All @@ -41,6 +62,7 @@ export interface RunnerInputParameters {
maxSpotPrice?: string;
instanceAllocationStrategy: SpotAllocationStrategy;
};
ec2OverrideConfig?: Ec2OverrideConfig;
numberOfRunners: number;
amiIdSsmParameterName?: string;
tracingEnabled?: boolean;
Expand Down
211 changes: 211 additions & 0 deletions lambdas/functions/control-plane/src/aws/runners.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ describe('create runner', () => {
allocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
capacityType: 'spot',
type: 'Org',
scaleErrors: ['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded'],
};

const defaultExpectedFleetRequestValues: ExpectedFleetRequestValues = {
Expand Down Expand Up @@ -425,6 +426,215 @@ describe('create runner', () => {
}),
});
});

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these tests work with all good values, but because we are in the user space, what about bad values and their effects? Is it maybe worth extending the tests to not trust the user data? I am not sure how the behaviour will be if someone makes a mistake, does it take the whole batch/process out?

Copy link
Contributor

@stuartp44 stuartp44 Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example would be m5.large, could be m5,large. How will this change behaviour?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stuartp44

The payload will be sent to aws, and aws will validate it. Example

      - self-hosted
      - x64
      - type:small
      - ghr-ec2-instance-type:m5,xlarge
{
 "level": "WARN",
 "message": "Create fleet failed, error not recognized as scaling error.",
 "timestamp": "2026-03-09T17:24:59.008Z",
 "service": "test-small-scale-up",
 "sampling_rate": 0,
 "xray_trace_id": "REDACTED_XRAY_TRACE_ID",
 "region": "eu-west-1",
 "environment": "test-small",
 "module": "runners",
 "aws-request-id": "REDACTED_AWS_REQUEST_ID",
 "function-name": "test-small-scale-up",
 "runner": {
   "ephemeral": true,
   "type": "Org",
   "namePrefix": "",
   "n_events": 1
 },
 "data": [
   {
     "LaunchTemplateAndOverrides": {
       "LaunchTemplateSpecification": {
         "LaunchTemplateId": "REDACTED_LAUNCH_TEMPLATE_ID",
         "Version": "22"
       },
       "Overrides": {
         "InstanceType": "m5,xlarge",
         "SubnetId": "REDACTED_SUBNET_ID_1"
       }
     },
     "Lifecycle": "on-demand",
     "ErrorCode": "InvalidFleetConfiguration",
     "ErrorMessage": "Your requested instance type (m5,xlarge) is not supported in your requested Availability Zone (eu-west-1b)."
   },
   {
     "LaunchTemplateAndOverrides": {
       "LaunchTemplateSpecification": {
         "LaunchTemplateId": "REDACTED_LAUNCH_TEMPLATE_ID",
         "Version": "22"
       },
       "Overrides": {
         "InstanceType": "m5,xlarge",
         "SubnetId": "REDACTED_SUBNET_ID_2"
       }
     },
     "Lifecycle": "on-demand",
     "ErrorCode": "InvalidFleetConfiguration",
     "ErrorMessage": "Your requested instance type (m5,xlarge) is not supported in your requested Availability Zone (eu-west-1a)."
   }
 ]
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stuartp44 can you review again?

it('overrides SubnetId when specified in ec2OverrideConfig', async () => {
await createRunner({
...createRunnerConfig(defaultRunnerConfig),
ec2OverrideConfig: {
SubnetId: 'subnet-override',
},
});

expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
LaunchTemplateName: 'lt-1',
Version: '$Default',
},
Overrides: [
{
InstanceType: 'm5.large',
SubnetId: 'subnet-override',
},
{
InstanceType: 'c5.large',
SubnetId: 'subnet-override',
},
],
},
],
SpotOptions: {
AllocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
},
TagSpecifications: expect.any(Array),
TargetCapacitySpecification: {
DefaultTargetCapacityType: 'spot',
TotalTargetCapacity: 1,
},
Type: 'instant',
});
});

it('overrides InstanceType when specified in ec2OverrideConfig', async () => {
await createRunner({
...createRunnerConfig(defaultRunnerConfig),
ec2OverrideConfig: {
InstanceType: 't3.xlarge',
},
});

expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
LaunchTemplateName: 'lt-1',
Version: '$Default',
},
Overrides: [
{
InstanceType: 't3.xlarge',
SubnetId: 'subnet-123',
},
{
InstanceType: 't3.xlarge',
SubnetId: 'subnet-456',
},
],
},
],
SpotOptions: {
AllocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
},
TagSpecifications: expect.any(Array),
TargetCapacitySpecification: {
DefaultTargetCapacityType: 'spot',
TotalTargetCapacity: 1,
},
Type: 'instant',
});
});

it('overrides ImageId when specified in ec2OverrideConfig', async () => {
await createRunner({
...createRunnerConfig(defaultRunnerConfig),
ec2OverrideConfig: {
ImageId: 'ami-override-123',
},
});

expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
LaunchTemplateName: 'lt-1',
Version: '$Default',
},
Overrides: [
{
InstanceType: 'm5.large',
SubnetId: 'subnet-123',
ImageId: 'ami-override-123',
},
{
InstanceType: 'c5.large',
SubnetId: 'subnet-123',
ImageId: 'ami-override-123',
},
{
InstanceType: 'm5.large',
SubnetId: 'subnet-456',
ImageId: 'ami-override-123',
},
{
InstanceType: 'c5.large',
SubnetId: 'subnet-456',
ImageId: 'ami-override-123',
},
],
},
],
SpotOptions: {
AllocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
},
TagSpecifications: expect.any(Array),
TargetCapacitySpecification: {
DefaultTargetCapacityType: 'spot',
TotalTargetCapacity: 1,
},
Type: 'instant',
});
});

it('overrides all three fields (SubnetId, InstanceType, ImageId) when specified in ec2OverrideConfig', async () => {
await createRunner({
...createRunnerConfig(defaultRunnerConfig),
ec2OverrideConfig: {
SubnetId: 'subnet-custom',
InstanceType: 'c5.2xlarge',
ImageId: 'ami-custom-456',
},
});

expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
LaunchTemplateName: 'lt-1',
Version: '$Default',
},
Overrides: [
{
InstanceType: 'c5.2xlarge',
SubnetId: 'subnet-custom',
ImageId: 'ami-custom-456',
},
],
},
],
SpotOptions: {
AllocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
},
TagSpecifications: expect.any(Array),
TargetCapacitySpecification: {
DefaultTargetCapacityType: 'spot',
TotalTargetCapacity: 1,
},
Type: 'instant',
});
});

it('spreads additional ec2OverrideConfig properties to Overrides', async () => {
await createRunner({
...createRunnerConfig(defaultRunnerConfig),
ec2OverrideConfig: {
SubnetId: 'subnet-override',
InstanceType: 't3.medium',
MaxPrice: '0.05',
Priority: 1.5,
WeightedCapacity: 2.0,
},
});

expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
LaunchTemplateName: 'lt-1',
Version: '$Default',
},
Overrides: [
{
InstanceType: 't3.medium',
SubnetId: 'subnet-override',
MaxPrice: '0.05',
Priority: 1.5,
WeightedCapacity: 2.0,
},
],
},
],
SpotOptions: {
AllocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
},
TagSpecifications: expect.any(Array),
TargetCapacitySpecification: {
DefaultTargetCapacityType: 'spot',
TotalTargetCapacity: 1,
},
Type: 'instant',
});
});
});

describe('create runner with errors', () => {
Expand Down Expand Up @@ -546,6 +756,7 @@ describe('create runner with errors fail over to OnDemand', () => {
capacityType: 'spot',
type: 'Repo',
onDemandFailoverOnError: ['InsufficientInstanceCapacity'],
scaleErrors: ['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded'],
};
const defaultExpectedFleetRequestValues: ExpectedFleetRequestValues = {
type: 'Repo',
Expand Down
15 changes: 12 additions & 3 deletions lambdas/functions/control-plane/src/aws/runners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,22 @@ function generateFleetOverrides(
subnetIds: string[],
instancesTypes: string[],
amiId?: string,
ec2OverrideConfig?: Runners.Ec2OverrideConfig,
): FleetLaunchTemplateOverridesRequest[] {
const result: FleetLaunchTemplateOverridesRequest[] = [];
subnetIds.forEach((s) => {
instancesTypes.forEach((i) => {

// Use override values if available, otherwise use parameter arrays
const subnetsToUse = ec2OverrideConfig?.SubnetId ? [ec2OverrideConfig.SubnetId] : subnetIds;
const instanceTypesToUse = ec2OverrideConfig?.InstanceType ? [ec2OverrideConfig.InstanceType] : instancesTypes;
const amiIdToUse = ec2OverrideConfig?.ImageId ?? amiId;

subnetsToUse.forEach((s) => {
instanceTypesToUse.forEach((i) => {
const item: FleetLaunchTemplateOverridesRequest = {
SubnetId: s,
InstanceType: i as _InstanceType,
ImageId: amiId,
ImageId: amiIdToUse,
...ec2OverrideConfig,
};
result.push(item);
});
Expand Down Expand Up @@ -265,6 +273,7 @@ async function createInstances(
runnerParameters.subnets,
runnerParameters.ec2instanceCriteria.instanceTypes,
amiIdOverride,
runnerParameters.ec2OverrideConfig,
),
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,42 @@ describe('ScaleError', () => {

describe('toBatchItemFailures', () => {
const mockMessages: ActionRequestMessageSQS[] = [
{ messageId: 'msg-1', id: 1, eventType: 'workflow_job' },
{ messageId: 'msg-2', id: 2, eventType: 'workflow_job' },
{ messageId: 'msg-3', id: 3, eventType: 'workflow_job' },
{ messageId: 'msg-4', id: 4, eventType: 'workflow_job' },
{
messageId: 'msg-1',
id: 1,
eventType: 'workflow_job',
repositoryName: 'repo',
repositoryOwner: 'owner',
installationId: 123,
repoOwnerType: 'Organization',
},
{
messageId: 'msg-2',
id: 2,
eventType: 'workflow_job',
repositoryName: 'repo',
repositoryOwner: 'owner',
installationId: 123,
repoOwnerType: 'Organization',
},
{
messageId: 'msg-3',
id: 3,
eventType: 'workflow_job',
repositoryName: 'repo',
repositoryOwner: 'owner',
installationId: 123,
repoOwnerType: 'Organization',
},
{
messageId: 'msg-4',
id: 4,
eventType: 'workflow_job',
repositoryName: 'repo',
repositoryOwner: 'owner',
installationId: 123,
repoOwnerType: 'Organization',
},
];

it.each([
Expand Down
Loading
Loading