From 926c624d07c8f9708a0866326a458030a8b1d41c Mon Sep 17 00:00:00 2001 From: Corey Haines <34802794+coreyhaines31@users.noreply.github.com> Date: Wed, 4 Mar 2026 15:51:28 -0800 Subject: [PATCH] fix: address eval review - assertion mismatches and factual error - marketing-psychology eval 4: BJ Fogg assertion did not match expected_output which lists Goal-Gradient Effect. Fixed. - sales-enablement eval 2: all 6 categories assertion contradicted expected_output which only categorizes the 3 given objections. Fixed. - ad-creative eval 5: TikTok hard limit corrected to recommended (80 chars recommended, 100 max) per SKILL.md. Co-Authored-By: Claude Opus 4.6 --- skills/ad-creative/evals/evals.json | 2 +- skills/marketing-psychology/evals/evals.json | 2 +- skills/sales-enablement/evals/evals.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skills/ad-creative/evals/evals.json b/skills/ad-creative/evals/evals.json index fa7586a..84c2ae3 100644 --- a/skills/ad-creative/evals/evals.json +++ b/skills/ad-creative/evals/evals.json @@ -63,7 +63,7 @@ { "id": 5, "prompt": "I need to generate a big batch of ad variations for a multi-platform campaign launching next week. We're a meal delivery service targeting busy professionals. Need ads for Google, Meta, and TikTok.", - "expected_output": "Should activate the batch generation workflow. Should generate creative for all three platforms respecting each platform's character limits: Google RSA (30/90), Meta (125/40/30), TikTok (≤80 chars). Should identify 3-5 angles that work across platforms (convenience, health, time savings, variety, cost vs eating out). Should generate variations per angle per platform. Should note platform-specific creative considerations (TikTok needs video concepts, not just text). Should organize output clearly by platform.", + "expected_output": "Should activate the batch generation workflow. Should generate creative for all three platforms respecting each platform's character limits: Google RSA (30/90), Meta (125/40/30), TikTok (80 chars recommended, 100 max). Should identify 3-5 angles that work across platforms (convenience, health, time savings, variety, cost vs eating out). Should generate variations per angle per platform. Should note platform-specific creative considerations (TikTok needs video concepts, not just text). Should organize output clearly by platform.", "assertions": [ "Activates batch generation workflow", "Generates for all three platforms", diff --git a/skills/marketing-psychology/evals/evals.json b/skills/marketing-psychology/evals/evals.json index ffc7fb6..904ae6b 100644 --- a/skills/marketing-psychology/evals/evals.json +++ b/skills/marketing-psychology/evals/evals.json @@ -49,7 +49,7 @@ "prompt": "I'm designing an onboarding flow and want to use behavioral psychology to increase activation. What models should I apply?", "expected_output": "Should apply design and behavioral models from the skill's taxonomy: Goal-Gradient Effect (motivation increases near goal), Hick's Law (reduce choices), IKEA Effect (let users build something), Endowment Effect (let them experience ownership), Zeigarnik Effect (incomplete tasks drive completion), Commitment & Consistency (small asks first). Should explain how each applies to onboarding specifically. Should provide actionable recommendations for each model.", "assertions": [ - "Applies BJ Fogg Behavior Model", + "Applies Goal-Gradient Effect", "Applies Hick's Law", "Applies IKEA Effect or Endowment Effect", "Applies Zeigarnik Effect or commitment principles", diff --git a/skills/sales-enablement/evals/evals.json b/skills/sales-enablement/evals/evals.json index 6361ac2..1617f62 100644 --- a/skills/sales-enablement/evals/evals.json +++ b/skills/sales-enablement/evals/evals.json @@ -26,7 +26,7 @@ "Provides structured response for each (acknowledge, reframe, evidence, bridge)", "Provides 2-3 response variations per objection", "Organizes for quick reference during calls", - "Addresses all 6 objection categories from the skill" + "Categorizes objections using the skill's framework (competitor, budget, need/timing)" ], "files": [] },