Business Unit Detection Algorithms
Overview
Detection algorithms determine whether a repository or Terraform workspace represents a business unit that should be onboarded to Backstage. The system uses multiple strategies with weighted scoring.
Detection Strategies
Strategy 1: Repository Naming Convention
Pattern: bu-{business-unit}-{purpose}
// src/onboarding/detection/naming-convention.ts
export class NamingConventionDetector {
private readonly pattern = /^bu-([a-z0-9-]+)-(infrastructure|platform|foundation)$/;
detect(repoName: string): DetectionResult {
const match = repoName.match(this.pattern);
if (!match) {
return { detected: false, confidence: 0 };
}
const [_, businessUnit, purpose] = match;
return {
detected: true,
confidence: 0.9,
metadata: {
businessUnit,
purpose,
method: 'naming-convention',
},
};
}
}
Examples:
- ✅
bu-finance-infrastructure→ Business Unit:finance - ✅
bu-hr-platform→ Business Unit:hr - ✅
bu-sales-foundation→ Business Unit:sales - ❌
app-frontend→ Not a BU repo - ❌
bu-finance→ Missing purpose suffix
Confidence: 90% (highly reliable)
Strategy 2: GitHub Topics
Pattern: Repository has topic business-unit or backstage-domain
// src/onboarding/detection/github-topics.ts
export class GitHubTopicsDetector {
private readonly businessUnitTopics = [
'business-unit',
'backstage-domain',
'organizational-unit',
];
detect(topics: string[]): DetectionResult {
const hasBusinessUnitTopic = topics.some(topic =>
this.businessUnitTopics.includes(topic)
);
if (!hasBusinessUnitTopic) {
return { detected: false, confidence: 0 };
}
// Try to extract business unit from other topics
const buTopic = topics.find(t => t.startsWith('bu:'));
const businessUnit = buTopic ? buTopic.replace('bu:', '') : null;
return {
detected: true,
confidence: 0.8,
metadata: {
businessUnit,
method: 'github-topics',
topics,
},
};
}
}
Example:
# Repository topics in GitHub
topics:
- business-unit
- bu:finance
- terraform
- gcp
Confidence: 80% (reliable but can be manually set incorrectly)
Strategy 3: Repository Description
Pattern: Description contains [BU] or [Business Unit] marker
// src/onboarding/detection/description-marker.ts
export class DescriptionMarkerDetector {
private readonly markers = [
/\[BU\]/i,
/\[Business Unit\]/i,
/\[Domain\]/i,
];
detect(description: string): DetectionResult {
if (!description) {
return { detected: false, confidence: 0 };
}
const hasMarker = this.markers.some(marker => marker.test(description));
if (!hasMarker) {
return { detected: false, confidence: 0 };
}
// Try to extract business unit name from description
const buMatch = description.match(/\[BU:([^\]]+)\]/i);
const businessUnit = buMatch ? buMatch[1].trim().toLowerCase() : null;
return {
detected: true,
confidence: 0.7,
metadata: {
businessUnit,
method: 'description-marker',
description,
},
};
}
}
Examples:
- ✅
[BU] Finance infrastructure for Backstage - ✅
[BU:Sales] CRM and sales tooling foundation - ✅
[Business Unit] HR systems platform - ❌
Application infrastructure→ No marker
Confidence: 70% (manually set, prone to errors)
Strategy 4: Backstage Configuration File
Pattern: Repository contains .backstage/config.yaml with kind: Domain
// src/onboarding/detection/backstage-config.ts
import yaml from 'yaml';
export class BackstageConfigDetector {
async detect(repoUrl: string, branch: string): Promise<DetectionResult> {
try {
const content = await this.fetchFile(repoUrl, '.backstage/config.yaml', branch);
const config = yaml.parse(content);
if (config.kind !== 'Domain' && config.kind !== 'System') {
return { detected: false, confidence: 0 };
}
const businessUnit = config.metadata.name;
const owner = config.spec.owner;
return {
detected: true,
confidence: 1.0, // Explicit configuration = highest confidence
metadata: {
businessUnit,
owner,
method: 'backstage-config',
config,
},
};
} catch (error) {
// File doesn't exist or invalid YAML
return { detected: false, confidence: 0 };
}
}
private async fetchFile(repoUrl: string, path: string, branch: string): Promise<string> {
// Use GitHub API to fetch file content
const { owner, repo } = this.parseRepoUrl(repoUrl);
const response = await github.repos.getContent({
owner,
repo,
path,
ref: branch,
});
if (Array.isArray(response.data) || response.data.type !== 'file') {
throw new Error('Not a file');
}
return Buffer.from(response.data.content, 'base64').toString('utf-8');
}
}
Example Configuration:
# .backstage/config.yaml
apiVersion: backstage.io/v1alpha1
kind: Domain
metadata:
name: finance
annotations:
backstage.io/auto-onboard: "true"
spec:
owner: group:finance-leadership
Confidence: 100% (explicit opt-in)
Strategy 5: Terraform Cloud Workspace Tags
Pattern: Workspace has tag business-unit or backstage:domain
// src/onboarding/detection/tfc-tags.ts
export class TerraformCloudTagsDetector {
private readonly businessUnitTags = [
'business-unit',
'backstage:domain',
'organizational-unit',
];
detect(workspace: TerraformWorkspace): DetectionResult {
const tags = workspace.tags || [];
const hasBusinessUnitTag = tags.some(tag =>
this.businessUnitTags.some(buTag => tag.name === buTag)
);
if (!hasBusinessUnitTag) {
return { detected: false, confidence: 0 };
}
// Extract business unit from tag
const buTag = tags.find(t => t.name.startsWith('bu:'));
const businessUnit = buTag ? buTag.name.replace('bu:', '') : null;
return {
detected: true,
confidence: 0.85,
metadata: {
businessUnit,
workspaceName: workspace.name,
method: 'tfc-tags',
tags: tags.map(t => t.name),
},
};
}
}
Example:
# Terraform Cloud workspace configuration
resource "tfe_workspace" "finance" {
name = "bu-finance-infrastructure"
organization = "acme-corp"
tag_names = [
"business-unit",
"bu:finance",
"gcp",
]
}
Confidence: 85% (structured metadata)
Strategy 6: Terraform State Analysis
Pattern: State contains specific GCP resources (folder + projects)
// src/onboarding/detection/terraform-state.ts
export class TerraformStateDetector {
detect(state: TerraformState): DetectionResult {
const resources = state.resources || [];
// Check for organizational resources
const hasFolder = resources.some(r => r.type === 'google_folder');
const hasProjects = resources.some(r => r.type === 'google_project');
const hasOrgResources = resources.some(r =>
r.type === 'google_organization_iam_binding' ||
r.type === 'google_organization_policy'
);
if (!hasFolder && !hasProjects) {
return { detected: false, confidence: 0 };
}
// Extract folder name as potential business unit
const folderResource = resources.find(r => r.type === 'google_folder');
const businessUnit = folderResource?.instances[0]?.attributes?.display_name?.toLowerCase();
// Calculate confidence based on resource types
let confidence = 0;
if (hasFolder) confidence += 0.4;
if (hasProjects) confidence += 0.3;
if (hasOrgResources) confidence += 0.2;
return {
detected: confidence > 0.5,
confidence,
metadata: {
businessUnit,
method: 'terraform-state',
resourceTypes: [...new Set(resources.map(r => r.type))],
},
};
}
}
Detected Resources:
google_folder→ Organizational foldergoogle_project→ GCP projectsgoogle_organization_iam_*→ Organization-level IAMgoogle_organization_policy→ Organization policies
Confidence: 50-90% depending on resource types
Strategy 7: Repository File Structure
Pattern: Repository contains specific directory structure
// src/onboarding/detection/file-structure.ts
export class FileStructureDetector {
private readonly buStructureMarkers = [
'terraform/',
'modules/',
'environments/',
'projects/',
];
async detect(repoUrl: string, branch: string): Promise<DetectionResult> {
try {
const tree = await this.fetchRepoTree(repoUrl, branch);
// Check for infrastructure-as-code patterns
const hasIaC = tree.some(file =>
file.path.endsWith('.tf') || file.path.endsWith('.tfvars')
);
// Check for directory structure
const structureMatches = this.buStructureMarkers.filter(marker =>
tree.some(file => file.path.startsWith(marker))
);
if (!hasIaC || structureMatches.length < 2) {
return { detected: false, confidence: 0 };
}
// Calculate confidence based on structure matches
const confidence = Math.min(0.6, structureMatches.length * 0.2);
return {
detected: confidence > 0.3,
confidence,
metadata: {
method: 'file-structure',
hasIaC,
structureMatches,
},
};
} catch (error) {
return { detected: false, confidence: 0 };
}
}
private async fetchRepoTree(repoUrl: string, branch: string): Promise<GitTreeItem[]> {
const { owner, repo } = this.parseRepoUrl(repoUrl);
const response = await github.git.getTree({
owner,
repo,
tree_sha: branch,
recursive: 'true',
});
return response.data.tree;
}
}
Expected Structure:
bu-finance-infrastructure/
├── terraform/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
├── modules/
│ ├── folder/
│ └── project/
├── environments/
│ ├── dev.tfvars
│ └── prod.tfvars
└── projects/
├── finance-app-1/
└── finance-app-2/
Confidence: 30-60% (heuristic-based)
Composite Detection Algorithm
Weighted Scoring
// src/onboarding/detection/composite-detector.ts
export class CompositeDetector {
private detectors: Detector[] = [
new BackstageConfigDetector(), // Weight: 1.0
new NamingConventionDetector(), // Weight: 0.9
new TerraformCloudTagsDetector(), // Weight: 0.85
new GitHubTopicsDetector(), // Weight: 0.8
new TerraformStateDetector(), // Weight: 0.7
new DescriptionMarkerDetector(), // Weight: 0.7
new FileStructureDetector(), // Weight: 0.5
];
async detect(input: DetectionInput): Promise<CompositeResult> {
// Run all detectors in parallel
const results = await Promise.all(
this.detectors.map(async detector => {
try {
return await detector.detect(input);
} catch (error) {
console.warn(`Detector ${detector.constructor.name} failed:`, error);
return { detected: false, confidence: 0 };
}
})
);
// Calculate weighted score
const totalConfidence = results.reduce((sum, r) => sum + r.confidence, 0);
const maxPossibleConfidence = results.length; // All detectors at 100%
const normalizedScore = totalConfidence / maxPossibleConfidence;
// Aggregate metadata
const metadata = this.aggregateMetadata(results);
// Determine if business unit detected
const threshold = 0.5; // Require 50% overall confidence
const detected = normalizedScore >= threshold;
return {
detected,
confidence: normalizedScore,
threshold,
metadata,
detectorResults: results,
};
}
private aggregateMetadata(results: DetectionResult[]): BusinessUnitMetadata {
// Collect all detected business unit names
const businessUnits = results
.filter(r => r.detected && r.metadata?.businessUnit)
.map(r => r.metadata.businessUnit);
// Use most common business unit name (consensus)
const businessUnit = this.mostCommon(businessUnits);
// Collect all detection methods
const methods = results
.filter(r => r.detected)
.map(r => r.metadata?.method);
return {
businessUnit,
detectionMethods: methods,
consensus: businessUnits.length > 1 ? this.calculateConsensus(businessUnits) : 1.0,
};
}
private mostCommon<T>(arr: T[]): T | null {
if (arr.length === 0) return null;
const counts = new Map<T, number>();
arr.forEach(item => counts.set(item, (counts.get(item) || 0) + 1));
let maxCount = 0;
let mostCommon = arr[0];
counts.forEach((count, item) => {
if (count > maxCount) {
maxCount = count;
mostCommon = item;
}
});
return mostCommon;
}
private calculateConsensus(businessUnits: string[]): number {
const mostCommon = this.mostCommon(businessUnits);
const total = businessUnits.length;
const matches = businessUnits.filter(bu => bu === mostCommon).length;
return matches / total;
}
}
Detection Threshold
export const DETECTION_THRESHOLDS = {
// Require 50% overall confidence
DEFAULT: 0.5,
// Require 70% confidence for auto-onboarding without review
AUTO_ONBOARD: 0.7,
// Require 90% confidence for silent onboarding (no notifications)
SILENT: 0.9,
// Below 30% confidence = definitely not a BU
REJECT: 0.3,
};
Decision Logic
export class DetectionDecisionMaker {
makeDecision(result: CompositeResult): DetectionDecision {
if (result.confidence < DETECTION_THRESHOLDS.REJECT) {
return {
action: 'reject',
reason: 'Confidence below rejection threshold',
confidence: result.confidence,
};
}
if (result.confidence >= DETECTION_THRESHOLDS.SILENT) {
return {
action: 'onboard',
requiresApproval: false,
sendNotification: false,
confidence: result.confidence,
};
}
if (result.confidence >= DETECTION_THRESHOLDS.AUTO_ONBOARD) {
return {
action: 'onboard',
requiresApproval: false,
sendNotification: true,
confidence: result.confidence,
};
}
if (result.confidence >= DETECTION_THRESHOLDS.DEFAULT) {
return {
action: 'onboard_with_review',
requiresApproval: true,
sendNotification: true,
confidence: result.confidence,
reviewReason: 'Medium confidence detection',
};
}
return {
action: 'defer',
reason: 'Ambiguous detection, awaiting additional signals',
confidence: result.confidence,
};
}
}
Project Detection (Under Business Units)
Project Workspace Detection
// src/onboarding/detection/project-detector.ts
export class ProjectDetector {
private readonly projectPattern = /^bu-([a-z0-9-]+)-([a-z0-9-]+)-(dev|staging|prod)$/;
detectProject(workspaceName: string, businessUnit: string): ProjectDetectionResult {
const match = workspaceName.match(this.projectPattern);
if (!match) {
return { detected: false };
}
const [_, bu, projectName, environment] = match;
// Verify business unit matches parent
if (bu !== businessUnit) {
return { detected: false };
}
return {
detected: true,
metadata: {
businessUnit: bu,
projectName,
environment,
componentName: `${bu}-${projectName}-${environment}`,
systemName: `${bu}-infrastructure`,
},
};
}
}
Example:
- BU Workspace:
bu-finance-infrastructure - Project Workspaces:
bu-finance-erp-prod→ Project:erp, Environment:prodbu-finance-analytics-dev→ Project:analytics, Environment:dev
Project Discovery Flow
export class ProjectDiscoveryService {
async discoverProjects(businessUnit: string, tfcOrg: string): Promise<Project[]> {
// Find all workspaces for this business unit
const allWorkspaces = await this.tfcClient.listWorkspaces(tfcOrg);
const projectWorkspaces = allWorkspaces.filter(ws => {
const detection = this.projectDetector.detectProject(ws.name, businessUnit);
return detection.detected;
});
// Convert to Backstage Component entities
return projectWorkspaces.map(ws => {
const detection = this.projectDetector.detectProject(ws.name, businessUnit);
return {
workspaceId: ws.id,
workspaceName: ws.name,
...detection.metadata,
};
});
}
}
Example Detection Scenarios
Scenario 1: Perfect Detection (100% confidence)
Input:
- Repository:
bu-finance-infrastructure - Topics:
[business-unit, bu:finance] - File:
.backstage/config.yamlexists withkind: Domain - TFC Workspace:
bu-finance-infrastructurewith tagbusiness-unit - State: Contains
google_folderandgoogle_project
Detection Results:
{
"detected": true,
"confidence": 0.94,
"metadata": {
"businessUnit": "finance",
"detectionMethods": [
"backstage-config",
"naming-convention",
"github-topics",
"tfc-tags",
"terraform-state"
],
"consensus": 1.0
},
"decision": {
"action": "onboard",
"requiresApproval": false,
"sendNotification": false
}
}
Scenario 2: Good Detection (75% confidence)
Input:
- Repository:
bu-hr-platform - Topics:
[business-unit] - No
.backstage/config.yaml - TFC Workspace:
bu-hr-platform - State: Contains
google_projectonly
Detection Results:
{
"detected": true,
"confidence": 0.75,
"metadata": {
"businessUnit": "hr",
"detectionMethods": [
"naming-convention",
"github-topics",
"terraform-state"
],
"consensus": 1.0
},
"decision": {
"action": "onboard",
"requiresApproval": false,
"sendNotification": true
}
}
Scenario 3: Ambiguous Detection (55% confidence)
Input:
- Repository:
infrastructure-finance - Topics:
[] - Description:
[BU] Finance systems - TFC Workspace:
finance-infra - State: Contains
google_compute_instanceonly
Detection Results:
{
"detected": true,
"confidence": 0.55,
"metadata": {
"businessUnit": "finance",
"detectionMethods": [
"description-marker"
],
"consensus": 1.0
},
"decision": {
"action": "onboard_with_review",
"requiresApproval": true,
"sendNotification": true,
"reviewReason": "Medium confidence detection"
}
}
Scenario 4: Rejected Detection (20% confidence)
Input:
- Repository:
my-app - Topics:
[application, frontend] - No
.backstage/config.yaml - TFC Workspace:
my-app-prod - State: Contains
google_cloud_run_serviceonly
Detection Results:
{
"detected": false,
"confidence": 0.2,
"metadata": {
"businessUnit": null,
"detectionMethods": [],
"consensus": 0
},
"decision": {
"action": "reject",
"reason": "Confidence below rejection threshold"
}
}
Best Practices
1. Explicit Configuration Wins
Always prioritize .backstage/config.yaml over heuristics
2. Multiple Signals
Require at least 2 detection methods to agree for auto-onboarding
3. Manual Override
Provide UI for manual detection override by admins
4. Feedback Loop
Track detection accuracy and improve algorithms based on false positives/negatives
5. Logging
Log all detection attempts with full context for debugging
Next Steps
See 05-validation-quality-gates.md for validation logic after detection.