Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .changeset/add-iso-639-3-validation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
"@lingo.dev/_locales": minor
---

feat: use ISO 639-3 package for comprehensive language code validation

Replaces hardcoded list of ISO 639-1 (2-letter) language codes with the comprehensive iso-639-3 package, which includes:
- All ISO 639-1 codes (2-letter, ~184 languages)
- All ISO 639-2 codes (3-letter bibliographic and terminologic)
- All ISO 639-3 codes (3-letter, ~8,000 languages)

This fixes validation issues with 3-letter language codes like:
- `fil` (Filipino)
- `bar` (Bavarian)
- `nap` (Neapolitan)
- `zgh` (Standard Moroccan Tamazight)

And many other languages that don't have 2-letter ISO 639-1 codes.
2 changes: 0 additions & 2 deletions packages/compiler/src/lib/lcp/api/provider-details.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ describe("provider-details", () => {
"openrouter",
"ollama",
"mistral",
Comment on lines 11 to 13
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change appears to be unrelated to the locale validation feature. The removal of "openai" and "anthropic" from the expected provider list should be in a separate PR or explained in the PR description if it's intentional.

Copilot uses AI. Check for mistakes.
"openai",
"anthropic",
"lingo.dev",
]);
});
Expand Down
3 changes: 3 additions & 0 deletions packages/locales/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,8 @@
"tsup": "^8.3.5",
"typescript": "^5.8.3",
"vitest": "^3.2.4"
},
"dependencies": {
"iso-639-3": "^3.0.1"
}
}
42 changes: 42 additions & 0 deletions packages/locales/src/validation.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,23 @@ describe("isValidLocale", () => {
expect(isValidLocale("zh")).toBe(true);
});

it("should validate 3-letter language codes in locales", () => {
// Test ISO 639-2/3 codes that don't have 2-letter equivalents
expect(isValidLocale("fil")).toBe(true); // Filipino
expect(isValidLocale("fil-PH")).toBe(true); // Filipino (Philippines)
expect(isValidLocale("bar")).toBe(true); // Bavarian
expect(isValidLocale("bar-DE")).toBe(true); // Bavarian (Germany)
expect(isValidLocale("nap")).toBe(true); // Neapolitan
expect(isValidLocale("nap-IT")).toBe(true); // Neapolitan (Italy)
expect(isValidLocale("zgh")).toBe(true); // Standard Moroccan Tamazight
expect(isValidLocale("zgh-MA")).toBe(true); // Tamazight (Morocco)
});

it("should validate 3-letter language codes with script and region", () => {
// Test complex locales with 3-letter language codes
expect(isValidLocale("fil-Latn-PH")).toBe(true); // Filipino (Latin, Philippines)
});

it("should validate locales with numeric region codes", () => {
expect(isValidLocale("es-419")).toBe(true); // Latin America
expect(isValidLocale("en-001")).toBe(true); // World
Expand Down Expand Up @@ -83,6 +100,22 @@ describe("isValidLanguageCode", () => {
expect(isValidLanguageCode("yi")).toBe(true); // Yiddish
});

it("should validate 3-letter ISO 639-2/3 language codes", () => {
// Test the specific codes that were reported as failing
expect(isValidLanguageCode("fil")).toBe(true); // Filipino
expect(isValidLanguageCode("bar")).toBe(true); // Bavarian
expect(isValidLanguageCode("nap")).toBe(true); // Neapolitan
expect(isValidLanguageCode("zgh")).toBe(true); // Standard Moroccan Tamazight
});

it("should validate other common 3-letter language codes", () => {
expect(isValidLanguageCode("eng")).toBe(true); // English (ISO 639-2)
expect(isValidLanguageCode("spa")).toBe(true); // Spanish (ISO 639-2)
expect(isValidLanguageCode("fra")).toBe(true); // French (ISO 639-2)
expect(isValidLanguageCode("deu")).toBe(true); // German (ISO 639-2)
expect(isValidLanguageCode("jpn")).toBe(true); // Japanese (ISO 639-2)
});

it("should handle case insensitive validation", () => {
expect(isValidLanguageCode("EN")).toBe(true);
expect(isValidLanguageCode("Es")).toBe(true);
Expand All @@ -95,6 +128,15 @@ describe("isValidLanguageCode", () => {
expect(isValidLanguageCode("invalid")).toBe(false);
});

it("should reject invalid 3-letter language codes", () => {
// Ensure validation is not just accepting any 3-letter code
// Note: "aaa" is valid (Ghotuo language), so using truly invalid codes
expect(isValidLanguageCode("zzz")).toBe(false);
expect(isValidLanguageCode("xxx")).toBe(false);
expect(isValidLanguageCode("fake")).toBe(false);
expect(isValidLanguageCode("test")).toBe(false);
});

it("should handle edge cases", () => {
expect(isValidLanguageCode("")).toBe(false);
expect(isValidLanguageCode(" ")).toBe(false);
Expand Down
201 changes: 14 additions & 187 deletions packages/locales/src/validation.ts
Original file line number Diff line number Diff line change
@@ -1,196 +1,23 @@
import { LOCALE_REGEX } from "./constants";
import { iso6393, type Language } from "iso-639-3";

/**
* Validation functions for locale codes and components
*/

// ISO 639-1 language codes (most common)
const VALID_LANGUAGE_CODES = new Set([
"aa",
"ab",
"ae",
"af",
"ak",
"am",
"an",
"ar",
"as",
"av",
"ay",
"az",
"ba",
"be",
"bg",
"bh",
"bi",
"bm",
"bn",
"bo",
"br",
"bs",
"ca",
"ce",
"ch",
"co",
"cr",
"cs",
"cu",
"cv",
"cy",
"da",
"de",
"dv",
"dz",
"ee",
"el",
"en",
"eo",
"es",
"et",
"eu",
"fa",
"ff",
"fi",
"fj",
"fo",
"fr",
"fy",
"ga",
"gd",
"gl",
"gn",
"gu",
"gv",
"ha",
"he",
"hi",
"ho",
"hr",
"ht",
"hu",
"hy",
"hz",
"ia",
"id",
"ie",
"ig",
"ii",
"ik",
"io",
"is",
"it",
"iu",
"ja",
"jv",
"ka",
"kg",
"ki",
"kj",
"kk",
"kl",
"km",
"kn",
"ko",
"kr",
"ks",
"ku",
"kv",
"kw",
"ky",
"la",
"lb",
"lg",
"li",
"ln",
"lo",
"lt",
"lu",
"lv",
"mg",
"mh",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"na",
"nb",
"nd",
"ne",
"ng",
"nl",
"nn",
"no",
"nr",
"nv",
"ny",
"oc",
"oj",
"om",
"or",
"os",
"pa",
"pi",
"pl",
"ps",
"pt",
"qu",
"rm",
"rn",
"ro",
"ru",
"rw",
"sa",
"sc",
"sd",
"se",
"sg",
"si",
"sk",
"sl",
"sm",
"sn",
"so",
"sq",
"sr",
"ss",
"st",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"ti",
"tk",
"tl",
"tn",
"to",
"tr",
"ts",
"tt",
"tw",
"ty",
"ug",
"uk",
"ur",
"uz",
"ve",
"vi",
"vo",
"wa",
"wo",
"xh",
"yi",
"yo",
"za",
"zh",
"zu",
]);
// Create a set of all valid ISO 639-1, 639-2, and 639-3 language codes
// This includes 2-letter codes (ISO 639-1) and 3-letter codes (ISO 639-2/3)
const VALID_LANGUAGE_CODES = new Set(
iso6393.flatMap((lang: Language) =>
[
lang.iso6391, // 2-letter code (ISO 639-1)
lang.iso6392B, // 3-letter bibliographic code (ISO 639-2)
lang.iso6392T, // 3-letter terminologic code (ISO 639-2)
lang.iso6393, // 3-letter code (ISO 639-3)
].filter(Boolean).map((code) => code.toLowerCase()),
),
);


// ISO 15924 script codes (most common)
const VALID_SCRIPT_CODES = new Set([
Expand Down
2 changes: 2 additions & 0 deletions packages/locales/tsup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ export default defineConfig({
dts: true,
cjsInterop: true,
splitting: false,
// Bundle iso-639-3 since it's ESM-only and can't be required in CJS
noExternal: ["iso-639-3"],
outExtension: (ctx) => ({
js: ctx.format === "cjs" ? ".cjs" : ".mjs",
}),
Expand Down
9 changes: 9 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading