Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
856 changes: 856 additions & 0 deletions invokeai/app/invocations/detailer.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions invokeai/app/invocations/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ class ConditioningField(BaseModel):
class BoundingBoxField(BoundingBox):
"""A bounding box primitive value."""

label: Optional[str] = Field(
default=None,
description="The label associated with the bounding box. This value is typically set when the bounding box was produced by a detector.",
)
score: Optional[float] = Field(
default=None,
ge=0.0,
Expand Down
48 changes: 36 additions & 12 deletions invokeai/app/invocations/grounding_dino.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from pathlib import Path
from typing import Literal

Expand All @@ -18,6 +19,36 @@
"grounding-dino-tiny": "IDEA-Research/grounding-dino-tiny",
"grounding-dino-base": "IDEA-Research/grounding-dino-base",
}
GROUNDING_DINO_LABEL_SPLIT_PATTERN = re.compile(r"[|,.;]+")


def normalize_grounding_dino_label(label: str) -> str:
return label.strip().strip("|,.;").strip().lower()


def parse_grounding_dino_labels(prompt: str) -> list[str]:
labels: list[str] = []
seen: set[str] = set()

for raw_label in GROUNDING_DINO_LABEL_SPLIT_PATTERN.split(prompt):
label = normalize_grounding_dino_label(raw_label)
if not label or label in seen:
continue
labels.append(label)
seen.add(label)

return labels


def detection_result_to_bounding_box(detection: DetectionResult) -> BoundingBoxField:
return BoundingBoxField(
x_min=detection.box.xmin,
x_max=detection.box.xmax,
y_min=detection.box.ymin,
y_max=detection.box.ymax,
score=detection.score,
label=normalize_grounding_dino_label(detection.label),
)


@invocation(
Expand Down Expand Up @@ -49,23 +80,16 @@ class GroundingDinoInvocation(BaseInvocation):
def invoke(self, context: InvocationContext) -> BoundingBoxCollectionOutput:
# The model expects a 3-channel RGB image.
image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
labels = parse_grounding_dino_labels(self.prompt)
if len(labels) == 0:
return BoundingBoxCollectionOutput(collection=[])

detections = self._detect(
context=context, image=image_pil, labels=[self.prompt], threshold=self.detection_threshold
)
detections = self._detect(context=context, image=image_pil, labels=labels, threshold=self.detection_threshold)

# Convert detections to BoundingBoxCollectionOutput.
bounding_boxes: list[BoundingBoxField] = []
for detection in detections:
bounding_boxes.append(
BoundingBoxField(
x_min=detection.box.xmin,
x_max=detection.box.xmax,
y_min=detection.box.ymin,
y_max=detection.box.ymax,
score=detection.score,
)
)
bounding_boxes.append(detection_result_to_bounding_box(detection))
return BoundingBoxCollectionOutput(collection=bounding_boxes)

@staticmethod
Expand Down
220 changes: 220 additions & 0 deletions invokeai/frontend/web/public/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@
"generation": {
"title": "Generation"
},
"faceDetailer": {
"title": "Detailer"
},
"image": {
"title": "Image"
},
Expand Down Expand Up @@ -1016,6 +1019,7 @@
"dypeScale": "$t(parameters.dypeScale)",
"dypeExponent": "$t(parameters.dypeExponent)",
"generationMode": "Generation Mode",
"detailer": "Detailer",
"geminiTemperature": "Gemini Temperature",
"geminiThinkingLevel": "Gemini Thinking Level",
"openaiQuality": "OpenAI Quality",
Expand Down Expand Up @@ -1616,6 +1620,119 @@
},
"cfgScale": "CFG Scale",
"cfgRescaleMultiplier": "CFG Rescale Multiplier",
"faceDetailer": {
"enabled": "Enable",
"quality": "Quality",
"adjusted": "Adjusted",
"detector": "Detector",
"detectorModel": "Detector Model",
"samModel": "SAM Model",
"targetPrompt": "Target",
"detectionThreshold": "Threshold",
"faceSelection": "Target Selection",
"targetSize": "Target Size",
"maxUpscale": "Max Upscale",
"maxProcessSize": "Max Process",
"cropPadding": "Crop Padding",
"maskExpand": "Mask Expand",
"maskFeather": "Mask Feather",
"denoiseMaskExpand": "Denoise Expand",
"denoiseMaskFeather": "Denoise Gradient",
"pasteMaskExpand": "Paste Expand",
"pasteMaskFeather": "Paste Feather",
"colorCorrectMode": "Color Match",
"faceId": "Target ID",
"minConfidence": "Confidence",
"padding": "Padding",
"strength": "Denoise",
"steps": "Steps",
"cfgScale": "CFG Scale",
"maskBlur": "Mask Blur",
"developerOptions": "Developer Options",
"debugOutput": "Debug Output",
"legacyDetector": "Legacy Detector",
"mediapipeLegacyWarning": "MediaPipe is a legacy face-only detector for development testing. It is not part of the supported DINO/SAM Detailer path.",
"effectiveProfile": "Profile",
"effectiveValue": "Effective: {{value}}",
"bodyProfileActive": "Body Profile Active",
"bodyProfileSummary": "Process {{process}} / Strength {{strength}} / Steps {{steps}} / CFG <= {{cfg}}",
"cropPaddingTooltip": "Smaller padding gives the target more processing pixels. Larger padding keeps more surrounding context.",
"tooltips": {
"strength": "How much the detail pass may regenerate the selected crop.",
"steps": "How many denoise steps to run for the detail pass.",
"faceSelection": "How to choose one target when the detector finds multiple matches.",
"faceId": "The zero-based target index used when Target Selection is Index.",
"detectionThreshold": "Minimum detector confidence. Lower values find more targets but can add false positives.",
"detector": "Choose the target detector used by the Detailer. DINO + SAM is the supported V1 path.",
"detectorModel": "GroundingDINO model used to find target boxes. Larger models may improve detection and run slower.",
"samModel": "SAM model used to segment the selected target. Larger models may improve masks and run slower.",
"targetSize": "Preferred processing size for small detected crops.",
"maxUpscale": "Maximum pixel scale factor allowed before denoising the crop.",
"maxProcessSize": "Maximum processing resolution for the detail pass.",
"denoiseMaskExpand": "Expands the detected mask before denoising.",
"denoiseMaskFeather": "Softens the denoise mask edge before the crop is regenerated.",
"pasteMaskExpand": "Expands the final paste region.",
"pasteMaskFeather": "Softens the final paste edge.",
"colorCorrectMode": "Optionally match the detailed crop colors to the original crop before paste.",
"cfgScale": "Prompt guidance used by the detail denoise pass.",
"minConfidence": "Minimum MediaPipe face confidence for the legacy detector.",
"padding": "Extra pixels around the MediaPipe face crop.",
"maskBlur": "Legacy MediaPipe mask blur used for the detail denoise mask."
},
"advancedGroups": {
"detection": "Detection",
"cropScale": "Crop & Scale",
"masksPaste": "Masks & Paste",
"denoise": "Denoise"
},
"targetPresets": {
"label": "Target",
"face": "Face",
"head": "Head",
"hands": "Hands",
"body": "Body",
"custom": "Custom"
},
"detectors": {
"groundedSam": "DINO + SAM",
"mediapipe": "MediaPipe (Legacy)"
},
"dinoModels": {
"grounding-dino-tiny": "GroundingDINO Tiny",
"grounding-dino-base": "GroundingDINO Base"
},
"samModels": {
"segment-anything-2-small": "SAM 2 Small",
"segment-anything-2-tiny": "SAM 2 Tiny",
"segment-anything-2-base": "SAM 2 Base",
"segment-anything-2-large": "SAM 2 Large",
"segment-anything-base": "SAM Base",
"segment-anything-large": "SAM Large",
"segment-anything-huge": "SAM Huge"
},
"qualities": {
"fast": "Fast",
"balanced": "Balanced",
"high": "High"
},
"qualityTooltips": {
"fast": "Fast applies visible speed-first starter settings, including the small SAM model.",
"balanced": "Balanced applies visible recommended starter settings, including the base SAM model.",
"high": "High applies visible quality-first starter settings, including the large SAM model."
},
"faceSelections": {
"highestScore": "Highest Score",
"largestArea": "Largest Area",
"index": "Index"
},
"colorCorrectModes": {
"off": "Off",
"luma": "Luma",
"chroma": "Chroma",
"ycbcr": "YCbCr",
"rgb": "RGB"
}
},
"clipSkip": "CLIP Skip",
"coherenceMode": "Mode",
"coherenceEdgeSize": "Edge Size",
Expand Down Expand Up @@ -2107,6 +2224,109 @@
"• Lower Weight (0-.75): Creates a smaller impact on the final result."
]
},
"detailerQuality": {
"heading": "Detailer Quality",
"paragraphs": [
"Applies visible starter settings for the detail pass.",
"Fast favors speed, Balanced is the recommended default, and High uses larger mask models and processing sizes."
]
},
"detailerDenoisingStrength": {
"heading": "Denoising Strength",
"paragraphs": [
"Controls how much the selected crop may change during the detail pass.",
"Lower values preserve the original image more closely. Higher values repaint more strongly."
]
},
"detailerSteps": {
"heading": "Steps",
"paragraphs": ["Number of denoising steps used for the detail pass."]
},
"detailerTargetSelection": {
"heading": "Target Selection",
"paragraphs": ["Controls how one target is selected when the detector finds multiple matches."]
},
"detailerTargetId": {
"heading": "Target ID",
"paragraphs": ["Zero-based target index used when Target Selection is set to Index."]
},
"detailerDetectionThreshold": {
"heading": "Threshold",
"paragraphs": [
"Minimum detector confidence for candidate targets.",
"Lower values may find more targets, but can also add false positives."
]
},
"detailerDetector": {
"heading": "Detector",
"paragraphs": ["Selects the detector path. DINO + SAM is the supported Detailer path for this version."]
},
"detailerDinoModel": {
"heading": "DINO Model",
"paragraphs": ["GroundingDINO model used to find target boxes. Larger models may improve detection and run slower."]
},
"detailerSamModel": {
"heading": "SAM Model",
"paragraphs": ["Segmentation model used to create the target mask. Larger models may improve masks and run slower."]
},
"detailerTargetSize": {
"heading": "Target Size",
"paragraphs": ["Preferred processing size for small detected crops before denoising."]
},
"detailerMaxUpscale": {
"heading": "Max Upscale",
"paragraphs": ["Maximum pixel resize factor allowed before the crop is denoised."]
},
"detailerMaxProcess": {
"heading": "Max Process",
"paragraphs": ["Maximum processing resolution for the detail pass. This is a hard safety cap."]
},
"detailerCropPadding": {
"heading": "Crop Padding",
"paragraphs": [
"Extra context added around the detected target.",
"Smaller padding gives the target more processing pixels. Larger padding preserves more surrounding context."
]
},
"detailerDenoiseMaskExpand": {
"heading": "Denoise Expand",
"paragraphs": ["Expands the detected mask before the detail denoise pass."]
},
"detailerDenoiseMaskFeather": {
"heading": "Denoise Gradient",
"paragraphs": ["Softens the denoise mask edge used by the detail pass."]
},
"detailerPasteMaskExpand": {
"heading": "Paste Expand",
"paragraphs": ["Expands the final paste region used to composite the detailed crop back into the image."]
},
"detailerPasteMaskFeather": {
"heading": "Paste Feather",
"paragraphs": ["Softens the final paste edge when compositing the detailed crop back into the image."]
},
"detailerColorMatch": {
"heading": "Color Match",
"paragraphs": ["Optionally matches the detailed crop colors to the original crop before paste."]
},
"detailerCfgScale": {
"heading": "CFG Scale",
"paragraphs": [
"Prompt guidance used by the detail denoise pass.",
"Lower values usually preserve the original crop better. High values can overcook small inpainted regions."
]
},
"detailerMediapipeConfidence": {
"heading": "Confidence",
"paragraphs": ["Minimum MediaPipe face confidence for the legacy detector."]
},
"detailerMediapipePadding": {
"heading": "Padding",
"paragraphs": ["Extra pixels around the MediaPipe face crop."]
},
"detailerMediapipeMaskBlur": {
"heading": "Mask Blur",
"paragraphs": ["Legacy MediaPipe mask blur used for the detail denoise mask."]
},
"dynamicPrompts": {
"heading": "Dynamic Prompts",
"paragraphs": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,28 @@ export type Feature =
| 'controlNetProcessor'
| 'controlNetResizeMode'
| 'controlNetWeight'
| 'detailerCfgScale'
| 'detailerColorMatch'
| 'detailerCropPadding'
| 'detailerDenoiseMaskExpand'
| 'detailerDenoiseMaskFeather'
| 'detailerDenoisingStrength'
| 'detailerDetectionThreshold'
| 'detailerDetector'
| 'detailerDinoModel'
| 'detailerMaxProcess'
| 'detailerMaxUpscale'
| 'detailerMediapipeConfidence'
| 'detailerMediapipeMaskBlur'
| 'detailerMediapipePadding'
| 'detailerPasteMaskExpand'
| 'detailerPasteMaskFeather'
| 'detailerQuality'
| 'detailerSamModel'
| 'detailerSteps'
| 'detailerTargetId'
| 'detailerTargetSelection'
| 'detailerTargetSize'
| 'dynamicPrompts'
| 'dynamicPromptsMaxPrompts'
| 'dynamicPromptsSeedBehaviour'
Expand Down
Loading
Loading