Skip to main content

Business Unit Detection Algorithms

Overview

Detection algorithms determine whether a repository or Terraform workspace represents a business unit that should be onboarded to Backstage. The system uses multiple strategies with weighted scoring.

Detection Strategies

Strategy 1: Repository Naming Convention

Pattern: bu-{business-unit}-{purpose}

// src/onboarding/detection/naming-convention.ts
export class NamingConventionDetector {
private readonly pattern = /^bu-([a-z0-9-]+)-(infrastructure|platform|foundation)$/;

detect(repoName: string): DetectionResult {
const match = repoName.match(this.pattern);

if (!match) {
return { detected: false, confidence: 0 };
}

const [_, businessUnit, purpose] = match;

return {
detected: true,
confidence: 0.9,
metadata: {
businessUnit,
purpose,
method: 'naming-convention',
},
};
}
}

Examples:

  • bu-finance-infrastructure → Business Unit: finance
  • bu-hr-platform → Business Unit: hr
  • bu-sales-foundation → Business Unit: sales
  • app-frontend → Not a BU repo
  • bu-finance → Missing purpose suffix

Confidence: 90% (highly reliable)


Strategy 2: GitHub Topics

Pattern: Repository has topic business-unit or backstage-domain

// src/onboarding/detection/github-topics.ts
export class GitHubTopicsDetector {
private readonly businessUnitTopics = [
'business-unit',
'backstage-domain',
'organizational-unit',
];

detect(topics: string[]): DetectionResult {
const hasBusinessUnitTopic = topics.some(topic =>
this.businessUnitTopics.includes(topic)
);

if (!hasBusinessUnitTopic) {
return { detected: false, confidence: 0 };
}

// Try to extract business unit from other topics
const buTopic = topics.find(t => t.startsWith('bu:'));
const businessUnit = buTopic ? buTopic.replace('bu:', '') : null;

return {
detected: true,
confidence: 0.8,
metadata: {
businessUnit,
method: 'github-topics',
topics,
},
};
}
}

Example:

# Repository topics in GitHub
topics:
- business-unit
- bu:finance
- terraform
- gcp

Confidence: 80% (reliable but can be manually set incorrectly)


Strategy 3: Repository Description

Pattern: Description contains [BU] or [Business Unit] marker

// src/onboarding/detection/description-marker.ts
export class DescriptionMarkerDetector {
private readonly markers = [
/\[BU\]/i,
/\[Business Unit\]/i,
/\[Domain\]/i,
];

detect(description: string): DetectionResult {
if (!description) {
return { detected: false, confidence: 0 };
}

const hasMarker = this.markers.some(marker => marker.test(description));

if (!hasMarker) {
return { detected: false, confidence: 0 };
}

// Try to extract business unit name from description
const buMatch = description.match(/\[BU:([^\]]+)\]/i);
const businessUnit = buMatch ? buMatch[1].trim().toLowerCase() : null;

return {
detected: true,
confidence: 0.7,
metadata: {
businessUnit,
method: 'description-marker',
description,
},
};
}
}

Examples:

  • [BU] Finance infrastructure for Backstage
  • [BU:Sales] CRM and sales tooling foundation
  • [Business Unit] HR systems platform
  • Application infrastructure → No marker

Confidence: 70% (manually set, prone to errors)


Strategy 4: Backstage Configuration File

Pattern: Repository contains .backstage/config.yaml with kind: Domain

// src/onboarding/detection/backstage-config.ts
import yaml from 'yaml';

export class BackstageConfigDetector {
async detect(repoUrl: string, branch: string): Promise<DetectionResult> {
try {
const content = await this.fetchFile(repoUrl, '.backstage/config.yaml', branch);
const config = yaml.parse(content);

if (config.kind !== 'Domain' && config.kind !== 'System') {
return { detected: false, confidence: 0 };
}

const businessUnit = config.metadata.name;
const owner = config.spec.owner;

return {
detected: true,
confidence: 1.0, // Explicit configuration = highest confidence
metadata: {
businessUnit,
owner,
method: 'backstage-config',
config,
},
};
} catch (error) {
// File doesn't exist or invalid YAML
return { detected: false, confidence: 0 };
}
}

private async fetchFile(repoUrl: string, path: string, branch: string): Promise<string> {
// Use GitHub API to fetch file content
const { owner, repo } = this.parseRepoUrl(repoUrl);

const response = await github.repos.getContent({
owner,
repo,
path,
ref: branch,
});

if (Array.isArray(response.data) || response.data.type !== 'file') {
throw new Error('Not a file');
}

return Buffer.from(response.data.content, 'base64').toString('utf-8');
}
}

Example Configuration:

# .backstage/config.yaml
apiVersion: backstage.io/v1alpha1
kind: Domain
metadata:
name: finance
annotations:
backstage.io/auto-onboard: "true"
spec:
owner: group:finance-leadership

Confidence: 100% (explicit opt-in)


Strategy 5: Terraform Cloud Workspace Tags

Pattern: Workspace has tag business-unit or backstage:domain

// src/onboarding/detection/tfc-tags.ts
export class TerraformCloudTagsDetector {
private readonly businessUnitTags = [
'business-unit',
'backstage:domain',
'organizational-unit',
];

detect(workspace: TerraformWorkspace): DetectionResult {
const tags = workspace.tags || [];

const hasBusinessUnitTag = tags.some(tag =>
this.businessUnitTags.some(buTag => tag.name === buTag)
);

if (!hasBusinessUnitTag) {
return { detected: false, confidence: 0 };
}

// Extract business unit from tag
const buTag = tags.find(t => t.name.startsWith('bu:'));
const businessUnit = buTag ? buTag.name.replace('bu:', '') : null;

return {
detected: true,
confidence: 0.85,
metadata: {
businessUnit,
workspaceName: workspace.name,
method: 'tfc-tags',
tags: tags.map(t => t.name),
},
};
}
}

Example:

# Terraform Cloud workspace configuration
resource "tfe_workspace" "finance" {
name = "bu-finance-infrastructure"
organization = "acme-corp"

tag_names = [
"business-unit",
"bu:finance",
"gcp",
]
}

Confidence: 85% (structured metadata)


Strategy 6: Terraform State Analysis

Pattern: State contains specific GCP resources (folder + projects)

// src/onboarding/detection/terraform-state.ts
export class TerraformStateDetector {
detect(state: TerraformState): DetectionResult {
const resources = state.resources || [];

// Check for organizational resources
const hasFolder = resources.some(r => r.type === 'google_folder');
const hasProjects = resources.some(r => r.type === 'google_project');
const hasOrgResources = resources.some(r =>
r.type === 'google_organization_iam_binding' ||
r.type === 'google_organization_policy'
);

if (!hasFolder && !hasProjects) {
return { detected: false, confidence: 0 };
}

// Extract folder name as potential business unit
const folderResource = resources.find(r => r.type === 'google_folder');
const businessUnit = folderResource?.instances[0]?.attributes?.display_name?.toLowerCase();

// Calculate confidence based on resource types
let confidence = 0;
if (hasFolder) confidence += 0.4;
if (hasProjects) confidence += 0.3;
if (hasOrgResources) confidence += 0.2;

return {
detected: confidence > 0.5,
confidence,
metadata: {
businessUnit,
method: 'terraform-state',
resourceTypes: [...new Set(resources.map(r => r.type))],
},
};
}
}

Detected Resources:

  • google_folder → Organizational folder
  • google_project → GCP projects
  • google_organization_iam_* → Organization-level IAM
  • google_organization_policy → Organization policies

Confidence: 50-90% depending on resource types


Strategy 7: Repository File Structure

Pattern: Repository contains specific directory structure

// src/onboarding/detection/file-structure.ts
export class FileStructureDetector {
private readonly buStructureMarkers = [
'terraform/',
'modules/',
'environments/',
'projects/',
];

async detect(repoUrl: string, branch: string): Promise<DetectionResult> {
try {
const tree = await this.fetchRepoTree(repoUrl, branch);

// Check for infrastructure-as-code patterns
const hasIaC = tree.some(file =>
file.path.endsWith('.tf') || file.path.endsWith('.tfvars')
);

// Check for directory structure
const structureMatches = this.buStructureMarkers.filter(marker =>
tree.some(file => file.path.startsWith(marker))
);

if (!hasIaC || structureMatches.length < 2) {
return { detected: false, confidence: 0 };
}

// Calculate confidence based on structure matches
const confidence = Math.min(0.6, structureMatches.length * 0.2);

return {
detected: confidence > 0.3,
confidence,
metadata: {
method: 'file-structure',
hasIaC,
structureMatches,
},
};
} catch (error) {
return { detected: false, confidence: 0 };
}
}

private async fetchRepoTree(repoUrl: string, branch: string): Promise<GitTreeItem[]> {
const { owner, repo } = this.parseRepoUrl(repoUrl);

const response = await github.git.getTree({
owner,
repo,
tree_sha: branch,
recursive: 'true',
});

return response.data.tree;
}
}

Expected Structure:

bu-finance-infrastructure/
├── terraform/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
├── modules/
│ ├── folder/
│ └── project/
├── environments/
│ ├── dev.tfvars
│ └── prod.tfvars
└── projects/
├── finance-app-1/
└── finance-app-2/

Confidence: 30-60% (heuristic-based)


Composite Detection Algorithm

Weighted Scoring

// src/onboarding/detection/composite-detector.ts
export class CompositeDetector {
private detectors: Detector[] = [
new BackstageConfigDetector(), // Weight: 1.0
new NamingConventionDetector(), // Weight: 0.9
new TerraformCloudTagsDetector(), // Weight: 0.85
new GitHubTopicsDetector(), // Weight: 0.8
new TerraformStateDetector(), // Weight: 0.7
new DescriptionMarkerDetector(), // Weight: 0.7
new FileStructureDetector(), // Weight: 0.5
];

async detect(input: DetectionInput): Promise<CompositeResult> {
// Run all detectors in parallel
const results = await Promise.all(
this.detectors.map(async detector => {
try {
return await detector.detect(input);
} catch (error) {
console.warn(`Detector ${detector.constructor.name} failed:`, error);
return { detected: false, confidence: 0 };
}
})
);

// Calculate weighted score
const totalConfidence = results.reduce((sum, r) => sum + r.confidence, 0);
const maxPossibleConfidence = results.length; // All detectors at 100%
const normalizedScore = totalConfidence / maxPossibleConfidence;

// Aggregate metadata
const metadata = this.aggregateMetadata(results);

// Determine if business unit detected
const threshold = 0.5; // Require 50% overall confidence
const detected = normalizedScore >= threshold;

return {
detected,
confidence: normalizedScore,
threshold,
metadata,
detectorResults: results,
};
}

private aggregateMetadata(results: DetectionResult[]): BusinessUnitMetadata {
// Collect all detected business unit names
const businessUnits = results
.filter(r => r.detected && r.metadata?.businessUnit)
.map(r => r.metadata.businessUnit);

// Use most common business unit name (consensus)
const businessUnit = this.mostCommon(businessUnits);

// Collect all detection methods
const methods = results
.filter(r => r.detected)
.map(r => r.metadata?.method);

return {
businessUnit,
detectionMethods: methods,
consensus: businessUnits.length > 1 ? this.calculateConsensus(businessUnits) : 1.0,
};
}

private mostCommon<T>(arr: T[]): T | null {
if (arr.length === 0) return null;

const counts = new Map<T, number>();
arr.forEach(item => counts.set(item, (counts.get(item) || 0) + 1));

let maxCount = 0;
let mostCommon = arr[0];

counts.forEach((count, item) => {
if (count > maxCount) {
maxCount = count;
mostCommon = item;
}
});

return mostCommon;
}

private calculateConsensus(businessUnits: string[]): number {
const mostCommon = this.mostCommon(businessUnits);
const total = businessUnits.length;
const matches = businessUnits.filter(bu => bu === mostCommon).length;
return matches / total;
}
}

Detection Threshold

export const DETECTION_THRESHOLDS = {
// Require 50% overall confidence
DEFAULT: 0.5,

// Require 70% confidence for auto-onboarding without review
AUTO_ONBOARD: 0.7,

// Require 90% confidence for silent onboarding (no notifications)
SILENT: 0.9,

// Below 30% confidence = definitely not a BU
REJECT: 0.3,
};

Decision Logic

export class DetectionDecisionMaker {
makeDecision(result: CompositeResult): DetectionDecision {
if (result.confidence < DETECTION_THRESHOLDS.REJECT) {
return {
action: 'reject',
reason: 'Confidence below rejection threshold',
confidence: result.confidence,
};
}

if (result.confidence >= DETECTION_THRESHOLDS.SILENT) {
return {
action: 'onboard',
requiresApproval: false,
sendNotification: false,
confidence: result.confidence,
};
}

if (result.confidence >= DETECTION_THRESHOLDS.AUTO_ONBOARD) {
return {
action: 'onboard',
requiresApproval: false,
sendNotification: true,
confidence: result.confidence,
};
}

if (result.confidence >= DETECTION_THRESHOLDS.DEFAULT) {
return {
action: 'onboard_with_review',
requiresApproval: true,
sendNotification: true,
confidence: result.confidence,
reviewReason: 'Medium confidence detection',
};
}

return {
action: 'defer',
reason: 'Ambiguous detection, awaiting additional signals',
confidence: result.confidence,
};
}
}

Project Detection (Under Business Units)

Project Workspace Detection

// src/onboarding/detection/project-detector.ts
export class ProjectDetector {
private readonly projectPattern = /^bu-([a-z0-9-]+)-([a-z0-9-]+)-(dev|staging|prod)$/;

detectProject(workspaceName: string, businessUnit: string): ProjectDetectionResult {
const match = workspaceName.match(this.projectPattern);

if (!match) {
return { detected: false };
}

const [_, bu, projectName, environment] = match;

// Verify business unit matches parent
if (bu !== businessUnit) {
return { detected: false };
}

return {
detected: true,
metadata: {
businessUnit: bu,
projectName,
environment,
componentName: `${bu}-${projectName}-${environment}`,
systemName: `${bu}-infrastructure`,
},
};
}
}

Example:

  • BU Workspace: bu-finance-infrastructure
  • Project Workspaces:
    • bu-finance-erp-prod → Project: erp, Environment: prod
    • bu-finance-analytics-dev → Project: analytics, Environment: dev

Project Discovery Flow

export class ProjectDiscoveryService {
async discoverProjects(businessUnit: string, tfcOrg: string): Promise<Project[]> {
// Find all workspaces for this business unit
const allWorkspaces = await this.tfcClient.listWorkspaces(tfcOrg);

const projectWorkspaces = allWorkspaces.filter(ws => {
const detection = this.projectDetector.detectProject(ws.name, businessUnit);
return detection.detected;
});

// Convert to Backstage Component entities
return projectWorkspaces.map(ws => {
const detection = this.projectDetector.detectProject(ws.name, businessUnit);
return {
workspaceId: ws.id,
workspaceName: ws.name,
...detection.metadata,
};
});
}
}

Example Detection Scenarios

Scenario 1: Perfect Detection (100% confidence)

Input:

  • Repository: bu-finance-infrastructure
  • Topics: [business-unit, bu:finance]
  • File: .backstage/config.yaml exists with kind: Domain
  • TFC Workspace: bu-finance-infrastructure with tag business-unit
  • State: Contains google_folder and google_project

Detection Results:

{
"detected": true,
"confidence": 0.94,
"metadata": {
"businessUnit": "finance",
"detectionMethods": [
"backstage-config",
"naming-convention",
"github-topics",
"tfc-tags",
"terraform-state"
],
"consensus": 1.0
},
"decision": {
"action": "onboard",
"requiresApproval": false,
"sendNotification": false
}
}

Scenario 2: Good Detection (75% confidence)

Input:

  • Repository: bu-hr-platform
  • Topics: [business-unit]
  • No .backstage/config.yaml
  • TFC Workspace: bu-hr-platform
  • State: Contains google_project only

Detection Results:

{
"detected": true,
"confidence": 0.75,
"metadata": {
"businessUnit": "hr",
"detectionMethods": [
"naming-convention",
"github-topics",
"terraform-state"
],
"consensus": 1.0
},
"decision": {
"action": "onboard",
"requiresApproval": false,
"sendNotification": true
}
}

Scenario 3: Ambiguous Detection (55% confidence)

Input:

  • Repository: infrastructure-finance
  • Topics: []
  • Description: [BU] Finance systems
  • TFC Workspace: finance-infra
  • State: Contains google_compute_instance only

Detection Results:

{
"detected": true,
"confidence": 0.55,
"metadata": {
"businessUnit": "finance",
"detectionMethods": [
"description-marker"
],
"consensus": 1.0
},
"decision": {
"action": "onboard_with_review",
"requiresApproval": true,
"sendNotification": true,
"reviewReason": "Medium confidence detection"
}
}

Scenario 4: Rejected Detection (20% confidence)

Input:

  • Repository: my-app
  • Topics: [application, frontend]
  • No .backstage/config.yaml
  • TFC Workspace: my-app-prod
  • State: Contains google_cloud_run_service only

Detection Results:

{
"detected": false,
"confidence": 0.2,
"metadata": {
"businessUnit": null,
"detectionMethods": [],
"consensus": 0
},
"decision": {
"action": "reject",
"reason": "Confidence below rejection threshold"
}
}

Best Practices

1. Explicit Configuration Wins

Always prioritize .backstage/config.yaml over heuristics

2. Multiple Signals

Require at least 2 detection methods to agree for auto-onboarding

3. Manual Override

Provide UI for manual detection override by admins

4. Feedback Loop

Track detection accuracy and improve algorithms based on false positives/negatives

5. Logging

Log all detection attempts with full context for debugging


Next Steps

See 05-validation-quality-gates.md for validation logic after detection.