mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-01-24 08:13:08 +08:00
feat: package as Claude Code plugin with marketplace distribution
- Add .claude-plugin/plugin.json manifest for direct installation - Add .claude-plugin/marketplace.json for marketplace distribution - Reorganize skills to proper skill-name/SKILL.md format - Update hooks.json with relative paths for portability - Add new skills: continuous-learning, strategic-compact, eval-harness, verification-loop - Add new commands: checkpoint, eval, orchestrate, verify - Update README with plugin installation instructions Install via: /plugin marketplace add affaan-m/everything-claude-code /plugin install everything-claude-code@everything-claude-code
This commit is contained in:
46
.claude-plugin/marketplace.json
Normal file
46
.claude-plugin/marketplace.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"name": "everything-claude-code",
|
||||
"owner": {
|
||||
"name": "Affaan Mustafa",
|
||||
"email": "affaan@example.com"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Battle-tested Claude Code configurations from an Anthropic hackathon winner",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"plugins": [
|
||||
{
|
||||
"name": "everything-claude-code",
|
||||
"source": ".",
|
||||
"description": "Complete collection of agents, skills, hooks, commands, and rules evolved over 10+ months of intensive daily use",
|
||||
"version": "1.0.0",
|
||||
"author": {
|
||||
"name": "Affaan Mustafa"
|
||||
},
|
||||
"homepage": "https://github.com/affaan-m/everything-claude-code",
|
||||
"repository": "https://github.com/affaan-m/everything-claude-code",
|
||||
"license": "MIT",
|
||||
"keywords": [
|
||||
"agents",
|
||||
"skills",
|
||||
"hooks",
|
||||
"commands",
|
||||
"tdd",
|
||||
"code-review",
|
||||
"security",
|
||||
"best-practices"
|
||||
],
|
||||
"category": "workflow",
|
||||
"tags": [
|
||||
"agents",
|
||||
"skills",
|
||||
"hooks",
|
||||
"commands",
|
||||
"tdd",
|
||||
"code-review",
|
||||
"security",
|
||||
"best-practices"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
33
.claude-plugin/plugin.json
Normal file
33
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "everything-claude-code",
|
||||
"version": "1.0.0",
|
||||
"description": "Complete collection of battle-tested Claude Code configs from an Anthropic hackathon winner - agents, skills, hooks, commands, and rules evolved over 10+ months of intensive daily use",
|
||||
"author": {
|
||||
"name": "Affaan Mustafa",
|
||||
"url": "https://x.com/affaanmustafa"
|
||||
},
|
||||
"homepage": "https://github.com/affaan-m/everything-claude-code",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/affaan-m/everything-claude-code.git"
|
||||
},
|
||||
"license": "MIT",
|
||||
"keywords": [
|
||||
"claude-code",
|
||||
"agents",
|
||||
"skills",
|
||||
"hooks",
|
||||
"commands",
|
||||
"rules",
|
||||
"tdd",
|
||||
"code-review",
|
||||
"security",
|
||||
"workflow",
|
||||
"automation",
|
||||
"best-practices"
|
||||
],
|
||||
"commands": "./commands",
|
||||
"agents": "./agents",
|
||||
"skills": "./skills",
|
||||
"hooks": "./hooks/hooks.json"
|
||||
}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -24,3 +24,6 @@ node_modules/
|
||||
# Personal configs (if any)
|
||||
personal/
|
||||
private/
|
||||
|
||||
# Session templates (not committed)
|
||||
examples/sessions/*.tmp
|
||||
|
||||
73
README.md
73
README.md
@@ -42,8 +42,14 @@ The advanced techniques - token optimization, memory persistence across sessions
|
||||
|
||||
## What's Inside
|
||||
|
||||
This repo is a **Claude Code plugin** - install it directly or copy components manually.
|
||||
|
||||
```
|
||||
everything-claude-code/
|
||||
|-- .claude-plugin/ # Plugin and marketplace manifests
|
||||
| |-- plugin.json # Plugin metadata and component paths
|
||||
| |-- marketplace.json # Marketplace catalog for /plugin marketplace add
|
||||
|
|
||||
|-- agents/ # Specialized subagents for delegation
|
||||
| |-- planner.md # Feature implementation planning
|
||||
| |-- architect.md # System design decisions
|
||||
@@ -56,13 +62,15 @@ everything-claude-code/
|
||||
| |-- doc-updater.md # Documentation sync
|
||||
|
|
||||
|-- skills/ # Workflow definitions and domain knowledge
|
||||
| |-- coding-standards.md # Language best practices
|
||||
| |-- backend-patterns.md # API, database, caching patterns
|
||||
| |-- frontend-patterns.md # React, Next.js patterns
|
||||
| |-- coding-standards/ # Language best practices
|
||||
| |-- backend-patterns/ # API, database, caching patterns
|
||||
| |-- frontend-patterns/ # React, Next.js patterns
|
||||
| |-- continuous-learning/ # Auto-extract patterns from sessions (Longform Guide)
|
||||
| |-- strategic-compact/ # Manual compaction suggestions (Longform Guide)
|
||||
| |-- tdd-workflow/ # TDD methodology
|
||||
| |-- security-review/ # Security checklist
|
||||
| |-- eval-harness/ # Verification loop evaluation (Longform Guide)
|
||||
| |-- verification-loop/ # Continuous verification (Longform Guide)
|
||||
|
|
||||
|-- commands/ # Slash commands for quick execution
|
||||
| |-- tdd.md # /tdd - Test-driven development
|
||||
@@ -72,8 +80,10 @@ everything-claude-code/
|
||||
| |-- build-fix.md # /build-fix - Fix build errors
|
||||
| |-- refactor-clean.md # /refactor-clean - Dead code removal
|
||||
| |-- learn.md # /learn - Extract patterns mid-session (Longform Guide)
|
||||
| |-- checkpoint.md # /checkpoint - Save verification state (Longform Guide)
|
||||
| |-- verify.md # /verify - Run verification loop (Longform Guide)
|
||||
|
|
||||
|-- rules/ # Always-follow guidelines
|
||||
|-- rules/ # Always-follow guidelines (copy to ~/.claude/rules/)
|
||||
| |-- security.md # Mandatory security checks
|
||||
| |-- coding-style.md # Immutability, file organization
|
||||
| |-- testing.md # TDD, 80% coverage requirement
|
||||
@@ -84,9 +94,6 @@ everything-claude-code/
|
||||
|-- hooks/ # Trigger-based automations
|
||||
| |-- hooks.json # All hooks config (PreToolUse, PostToolUse, Stop, etc.)
|
||||
| |-- memory-persistence/ # Session lifecycle hooks (Longform Guide)
|
||||
| | |-- pre-compact.sh # Save state before compaction
|
||||
| | |-- session-start.sh # Load previous context
|
||||
| | |-- session-end.sh # Persist learnings on end
|
||||
| |-- strategic-compact/ # Compaction suggestions (Longform Guide)
|
||||
|
|
||||
|-- contexts/ # Dynamic system prompt injection contexts (Longform Guide)
|
||||
@@ -97,20 +104,54 @@ everything-claude-code/
|
||||
|-- examples/ # Example configurations and sessions
|
||||
| |-- CLAUDE.md # Example project-level config
|
||||
| |-- user-CLAUDE.md # Example user-level config
|
||||
| |-- sessions/ # Example session log files (Longform Guide)
|
||||
|
|
||||
|-- mcp-configs/ # MCP server configurations
|
||||
| |-- mcp-servers.json # GitHub, Supabase, Vercel, Railway, etc.
|
||||
|
|
||||
|-- plugins/ # Plugin ecosystem documentation
|
||||
|-- README.md # Plugins, marketplaces, skills guide
|
||||
|-- marketplace.json # Self-hosted marketplace config (for /plugin marketplace add)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
## Installation
|
||||
|
||||
### 1. Copy what you need
|
||||
### Option 1: Install as Plugin (Recommended)
|
||||
|
||||
The easiest way to use this repo - install as a Claude Code plugin:
|
||||
|
||||
```bash
|
||||
# Add this repo as a marketplace
|
||||
/plugin marketplace add affaan-m/everything-claude-code
|
||||
|
||||
# Install the plugin
|
||||
/plugin install everything-claude-code@everything-claude-code
|
||||
```
|
||||
|
||||
Or add directly to your `~/.claude/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"extraKnownMarketplaces": {
|
||||
"everything-claude-code": {
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "affaan-m/everything-claude-code"
|
||||
}
|
||||
}
|
||||
},
|
||||
"enabledPlugins": {
|
||||
"everything-claude-code@everything-claude-code": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This gives you instant access to all commands, agents, skills, and hooks.
|
||||
|
||||
---
|
||||
|
||||
### Option 2: Manual Installation
|
||||
|
||||
If you prefer manual control over what's installed:
|
||||
|
||||
```bash
|
||||
# Clone the repo
|
||||
@@ -129,17 +170,19 @@ cp everything-claude-code/commands/*.md ~/.claude/commands/
|
||||
cp -r everything-claude-code/skills/* ~/.claude/skills/
|
||||
```
|
||||
|
||||
### 2. Add hooks to settings.json
|
||||
#### Add hooks to settings.json
|
||||
|
||||
Copy the hooks from `hooks/hooks.json` to your `~/.claude/settings.json`.
|
||||
|
||||
### 3. Configure MCPs
|
||||
#### Configure MCPs
|
||||
|
||||
Copy desired MCP servers from `mcp-configs/mcp-servers.json` to your `~/.claude.json`.
|
||||
|
||||
**Important:** Replace `YOUR_*_HERE` placeholders with your actual API keys.
|
||||
|
||||
### 4. Read the guides
|
||||
---
|
||||
|
||||
### Read the Guides
|
||||
|
||||
Seriously, read the guides. These configs make 10x more sense with context.
|
||||
|
||||
|
||||
74
commands/checkpoint.md
Normal file
74
commands/checkpoint.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# Checkpoint Command
|
||||
|
||||
Create or verify a checkpoint in your workflow.
|
||||
|
||||
## Usage
|
||||
|
||||
`/checkpoint [create|verify|list] [name]`
|
||||
|
||||
## Create Checkpoint
|
||||
|
||||
When creating a checkpoint:
|
||||
|
||||
1. Run `/verify quick` to ensure current state is clean
|
||||
2. Create a git stash or commit with checkpoint name
|
||||
3. Log checkpoint to `.claude/checkpoints.log`:
|
||||
|
||||
```bash
|
||||
echo "$(date +%Y-%m-%d-%H:%M) | $CHECKPOINT_NAME | $(git rev-parse --short HEAD)" >> .claude/checkpoints.log
|
||||
```
|
||||
|
||||
4. Report checkpoint created
|
||||
|
||||
## Verify Checkpoint
|
||||
|
||||
When verifying against a checkpoint:
|
||||
|
||||
1. Read checkpoint from log
|
||||
2. Compare current state to checkpoint:
|
||||
- Files added since checkpoint
|
||||
- Files modified since checkpoint
|
||||
- Test pass rate now vs then
|
||||
- Coverage now vs then
|
||||
|
||||
3. Report:
|
||||
```
|
||||
CHECKPOINT COMPARISON: $NAME
|
||||
============================
|
||||
Files changed: X
|
||||
Tests: +Y passed / -Z failed
|
||||
Coverage: +X% / -Y%
|
||||
Build: [PASS/FAIL]
|
||||
```
|
||||
|
||||
## List Checkpoints
|
||||
|
||||
Show all checkpoints with:
|
||||
- Name
|
||||
- Timestamp
|
||||
- Git SHA
|
||||
- Status (current, behind, ahead)
|
||||
|
||||
## Workflow
|
||||
|
||||
Typical checkpoint flow:
|
||||
|
||||
```
|
||||
[Start] --> /checkpoint create "feature-start"
|
||||
|
|
||||
[Implement] --> /checkpoint create "core-done"
|
||||
|
|
||||
[Test] --> /checkpoint verify "core-done"
|
||||
|
|
||||
[Refactor] --> /checkpoint create "refactor-done"
|
||||
|
|
||||
[PR] --> /checkpoint verify "feature-start"
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
$ARGUMENTS:
|
||||
- `create <name>` - Create named checkpoint
|
||||
- `verify <name>` - Verify against named checkpoint
|
||||
- `list` - Show all checkpoints
|
||||
- `clear` - Remove old checkpoints (keeps last 5)
|
||||
120
commands/eval.md
Normal file
120
commands/eval.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# Eval Command
|
||||
|
||||
Manage eval-driven development workflow.
|
||||
|
||||
## Usage
|
||||
|
||||
`/eval [define|check|report|list] [feature-name]`
|
||||
|
||||
## Define Evals
|
||||
|
||||
`/eval define feature-name`
|
||||
|
||||
Create a new eval definition:
|
||||
|
||||
1. Create `.claude/evals/feature-name.md` with template:
|
||||
|
||||
```markdown
|
||||
## EVAL: feature-name
|
||||
Created: $(date)
|
||||
|
||||
### Capability Evals
|
||||
- [ ] [Description of capability 1]
|
||||
- [ ] [Description of capability 2]
|
||||
|
||||
### Regression Evals
|
||||
- [ ] [Existing behavior 1 still works]
|
||||
- [ ] [Existing behavior 2 still works]
|
||||
|
||||
### Success Criteria
|
||||
- pass@3 > 90% for capability evals
|
||||
- pass^3 = 100% for regression evals
|
||||
```
|
||||
|
||||
2. Prompt user to fill in specific criteria
|
||||
|
||||
## Check Evals
|
||||
|
||||
`/eval check feature-name`
|
||||
|
||||
Run evals for a feature:
|
||||
|
||||
1. Read eval definition from `.claude/evals/feature-name.md`
|
||||
2. For each capability eval:
|
||||
- Attempt to verify criterion
|
||||
- Record PASS/FAIL
|
||||
- Log attempt in `.claude/evals/feature-name.log`
|
||||
3. For each regression eval:
|
||||
- Run relevant tests
|
||||
- Compare against baseline
|
||||
- Record PASS/FAIL
|
||||
4. Report current status:
|
||||
|
||||
```
|
||||
EVAL CHECK: feature-name
|
||||
========================
|
||||
Capability: X/Y passing
|
||||
Regression: X/Y passing
|
||||
Status: IN PROGRESS / READY
|
||||
```
|
||||
|
||||
## Report Evals
|
||||
|
||||
`/eval report feature-name`
|
||||
|
||||
Generate comprehensive eval report:
|
||||
|
||||
```
|
||||
EVAL REPORT: feature-name
|
||||
=========================
|
||||
Generated: $(date)
|
||||
|
||||
CAPABILITY EVALS
|
||||
----------------
|
||||
[eval-1]: PASS (pass@1)
|
||||
[eval-2]: PASS (pass@2) - required retry
|
||||
[eval-3]: FAIL - see notes
|
||||
|
||||
REGRESSION EVALS
|
||||
----------------
|
||||
[test-1]: PASS
|
||||
[test-2]: PASS
|
||||
[test-3]: PASS
|
||||
|
||||
METRICS
|
||||
-------
|
||||
Capability pass@1: 67%
|
||||
Capability pass@3: 100%
|
||||
Regression pass^3: 100%
|
||||
|
||||
NOTES
|
||||
-----
|
||||
[Any issues, edge cases, or observations]
|
||||
|
||||
RECOMMENDATION
|
||||
--------------
|
||||
[SHIP / NEEDS WORK / BLOCKED]
|
||||
```
|
||||
|
||||
## List Evals
|
||||
|
||||
`/eval list`
|
||||
|
||||
Show all eval definitions:
|
||||
|
||||
```
|
||||
EVAL DEFINITIONS
|
||||
================
|
||||
feature-auth [3/5 passing] IN PROGRESS
|
||||
feature-search [5/5 passing] READY
|
||||
feature-export [0/4 passing] NOT STARTED
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
$ARGUMENTS:
|
||||
- `define <name>` - Create new eval definition
|
||||
- `check <name>` - Run and check evals
|
||||
- `report <name>` - Generate full report
|
||||
- `list` - Show all evals
|
||||
- `clean` - Remove old eval logs (keeps last 10 runs)
|
||||
172
commands/orchestrate.md
Normal file
172
commands/orchestrate.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# Orchestrate Command
|
||||
|
||||
Sequential agent workflow for complex tasks.
|
||||
|
||||
## Usage
|
||||
|
||||
`/orchestrate [workflow-type] [task-description]`
|
||||
|
||||
## Workflow Types
|
||||
|
||||
### feature
|
||||
Full feature implementation workflow:
|
||||
```
|
||||
planner -> tdd-guide -> code-reviewer -> security-reviewer
|
||||
```
|
||||
|
||||
### bugfix
|
||||
Bug investigation and fix workflow:
|
||||
```
|
||||
explorer -> tdd-guide -> code-reviewer
|
||||
```
|
||||
|
||||
### refactor
|
||||
Safe refactoring workflow:
|
||||
```
|
||||
architect -> code-reviewer -> tdd-guide
|
||||
```
|
||||
|
||||
### security
|
||||
Security-focused review:
|
||||
```
|
||||
security-reviewer -> code-reviewer -> architect
|
||||
```
|
||||
|
||||
## Execution Pattern
|
||||
|
||||
For each agent in the workflow:
|
||||
|
||||
1. **Invoke agent** with context from previous agent
|
||||
2. **Collect output** as structured handoff document
|
||||
3. **Pass to next agent** in chain
|
||||
4. **Aggregate results** into final report
|
||||
|
||||
## Handoff Document Format
|
||||
|
||||
Between agents, create handoff document:
|
||||
|
||||
```markdown
|
||||
## HANDOFF: [previous-agent] -> [next-agent]
|
||||
|
||||
### Context
|
||||
[Summary of what was done]
|
||||
|
||||
### Findings
|
||||
[Key discoveries or decisions]
|
||||
|
||||
### Files Modified
|
||||
[List of files touched]
|
||||
|
||||
### Open Questions
|
||||
[Unresolved items for next agent]
|
||||
|
||||
### Recommendations
|
||||
[Suggested next steps]
|
||||
```
|
||||
|
||||
## Example: Feature Workflow
|
||||
|
||||
```
|
||||
/orchestrate feature "Add user authentication"
|
||||
```
|
||||
|
||||
Executes:
|
||||
|
||||
1. **Planner Agent**
|
||||
- Analyzes requirements
|
||||
- Creates implementation plan
|
||||
- Identifies dependencies
|
||||
- Output: `HANDOFF: planner -> tdd-guide`
|
||||
|
||||
2. **TDD Guide Agent**
|
||||
- Reads planner handoff
|
||||
- Writes tests first
|
||||
- Implements to pass tests
|
||||
- Output: `HANDOFF: tdd-guide -> code-reviewer`
|
||||
|
||||
3. **Code Reviewer Agent**
|
||||
- Reviews implementation
|
||||
- Checks for issues
|
||||
- Suggests improvements
|
||||
- Output: `HANDOFF: code-reviewer -> security-reviewer`
|
||||
|
||||
4. **Security Reviewer Agent**
|
||||
- Security audit
|
||||
- Vulnerability check
|
||||
- Final approval
|
||||
- Output: Final Report
|
||||
|
||||
## Final Report Format
|
||||
|
||||
```
|
||||
ORCHESTRATION REPORT
|
||||
====================
|
||||
Workflow: feature
|
||||
Task: Add user authentication
|
||||
Agents: planner -> tdd-guide -> code-reviewer -> security-reviewer
|
||||
|
||||
SUMMARY
|
||||
-------
|
||||
[One paragraph summary]
|
||||
|
||||
AGENT OUTPUTS
|
||||
-------------
|
||||
Planner: [summary]
|
||||
TDD Guide: [summary]
|
||||
Code Reviewer: [summary]
|
||||
Security Reviewer: [summary]
|
||||
|
||||
FILES CHANGED
|
||||
-------------
|
||||
[List all files modified]
|
||||
|
||||
TEST RESULTS
|
||||
------------
|
||||
[Test pass/fail summary]
|
||||
|
||||
SECURITY STATUS
|
||||
---------------
|
||||
[Security findings]
|
||||
|
||||
RECOMMENDATION
|
||||
--------------
|
||||
[SHIP / NEEDS WORK / BLOCKED]
|
||||
```
|
||||
|
||||
## Parallel Execution
|
||||
|
||||
For independent checks, run agents in parallel:
|
||||
|
||||
```markdown
|
||||
### Parallel Phase
|
||||
Run simultaneously:
|
||||
- code-reviewer (quality)
|
||||
- security-reviewer (security)
|
||||
- architect (design)
|
||||
|
||||
### Merge Results
|
||||
Combine outputs into single report
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
$ARGUMENTS:
|
||||
- `feature <description>` - Full feature workflow
|
||||
- `bugfix <description>` - Bug fix workflow
|
||||
- `refactor <description>` - Refactoring workflow
|
||||
- `security <description>` - Security review workflow
|
||||
- `custom <agents> <description>` - Custom agent sequence
|
||||
|
||||
## Custom Workflow Example
|
||||
|
||||
```
|
||||
/orchestrate custom "architect,tdd-guide,code-reviewer" "Redesign caching layer"
|
||||
```
|
||||
|
||||
## Tips
|
||||
|
||||
1. **Start with planner** for complex features
|
||||
2. **Always include code-reviewer** before merge
|
||||
3. **Use security-reviewer** for auth/payment/PII
|
||||
4. **Keep handoffs concise** - focus on what next agent needs
|
||||
5. **Run verification** between agents if needed
|
||||
59
commands/verify.md
Normal file
59
commands/verify.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Verification Command
|
||||
|
||||
Run comprehensive verification on current codebase state.
|
||||
|
||||
## Instructions
|
||||
|
||||
Execute verification in this exact order:
|
||||
|
||||
1. **Build Check**
|
||||
- Run the build command for this project
|
||||
- If it fails, report errors and STOP
|
||||
|
||||
2. **Type Check**
|
||||
- Run TypeScript/type checker
|
||||
- Report all errors with file:line
|
||||
|
||||
3. **Lint Check**
|
||||
- Run linter
|
||||
- Report warnings and errors
|
||||
|
||||
4. **Test Suite**
|
||||
- Run all tests
|
||||
- Report pass/fail count
|
||||
- Report coverage percentage
|
||||
|
||||
5. **Console.log Audit**
|
||||
- Search for console.log in source files
|
||||
- Report locations
|
||||
|
||||
6. **Git Status**
|
||||
- Show uncommitted changes
|
||||
- Show files modified since last commit
|
||||
|
||||
## Output
|
||||
|
||||
Produce a concise verification report:
|
||||
|
||||
```
|
||||
VERIFICATION: [PASS/FAIL]
|
||||
|
||||
Build: [OK/FAIL]
|
||||
Types: [OK/X errors]
|
||||
Lint: [OK/X issues]
|
||||
Tests: [X/Y passed, Z% coverage]
|
||||
Secrets: [OK/X found]
|
||||
Logs: [OK/X console.logs]
|
||||
|
||||
Ready for PR: [YES/NO]
|
||||
```
|
||||
|
||||
If any critical issues, list them with fix suggestions.
|
||||
|
||||
## Arguments
|
||||
|
||||
$ARGUMENTS can be:
|
||||
- `quick` - Only build + types
|
||||
- `full` - All checks (default)
|
||||
- `pre-commit` - Checks relevant for commits
|
||||
- `pre-pr` - Full checks plus security scan
|
||||
@@ -47,7 +47,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "~/.claude/hooks/strategic-compact/suggest-compact.sh"
|
||||
"command": "./hooks/strategic-compact/suggest-compact.sh"
|
||||
}
|
||||
],
|
||||
"description": "Suggest manual compaction at logical intervals"
|
||||
@@ -59,7 +59,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "~/.claude/hooks/memory-persistence/pre-compact.sh"
|
||||
"command": "./hooks/memory-persistence/pre-compact.sh"
|
||||
}
|
||||
],
|
||||
"description": "Save state before context compaction"
|
||||
@@ -71,7 +71,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "~/.claude/hooks/memory-persistence/session-start.sh"
|
||||
"command": "./hooks/memory-persistence/session-start.sh"
|
||||
}
|
||||
],
|
||||
"description": "Load previous context on new session"
|
||||
@@ -135,7 +135,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "~/.claude/hooks/memory-persistence/session-end.sh"
|
||||
"command": "./hooks/memory-persistence/session-end.sh"
|
||||
}
|
||||
],
|
||||
"description": "Persist session state on end"
|
||||
@@ -145,7 +145,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "~/.claude/skills/continuous-learning/evaluate-session.sh"
|
||||
"command": "./skills/continuous-learning/evaluate-session.sh"
|
||||
}
|
||||
],
|
||||
"description": "Evaluate session for extractable patterns"
|
||||
|
||||
80
skills/continuous-learning/SKILL.md
Normal file
80
skills/continuous-learning/SKILL.md
Normal file
@@ -0,0 +1,80 @@
|
||||
---
|
||||
name: continuous-learning
|
||||
description: Automatically extract reusable patterns from Claude Code sessions and save them as learned skills for future use.
|
||||
---
|
||||
|
||||
# Continuous Learning Skill
|
||||
|
||||
Automatically evaluates Claude Code sessions on end to extract reusable patterns that can be saved as learned skills.
|
||||
|
||||
## How It Works
|
||||
|
||||
This skill runs as a **Stop hook** at the end of each session:
|
||||
|
||||
1. **Session Evaluation**: Checks if session has enough messages (default: 10+)
|
||||
2. **Pattern Detection**: Identifies extractable patterns from the session
|
||||
3. **Skill Extraction**: Saves useful patterns to `~/.claude/skills/learned/`
|
||||
|
||||
## Configuration
|
||||
|
||||
Edit `config.json` to customize:
|
||||
|
||||
```json
|
||||
{
|
||||
"min_session_length": 10,
|
||||
"extraction_threshold": "medium",
|
||||
"auto_approve": false,
|
||||
"learned_skills_path": "~/.claude/skills/learned/",
|
||||
"patterns_to_detect": [
|
||||
"error_resolution",
|
||||
"user_corrections",
|
||||
"workarounds",
|
||||
"debugging_techniques",
|
||||
"project_specific"
|
||||
],
|
||||
"ignore_patterns": [
|
||||
"simple_typos",
|
||||
"one_time_fixes",
|
||||
"external_api_issues"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Pattern Types
|
||||
|
||||
| Pattern | Description |
|
||||
|---------|-------------|
|
||||
| `error_resolution` | How specific errors were resolved |
|
||||
| `user_corrections` | Patterns from user corrections |
|
||||
| `workarounds` | Solutions to framework/library quirks |
|
||||
| `debugging_techniques` | Effective debugging approaches |
|
||||
| `project_specific` | Project-specific conventions |
|
||||
|
||||
## Hook Setup
|
||||
|
||||
Add to your `~/.claude/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"hooks": {
|
||||
"Stop": [{
|
||||
"matcher": "*",
|
||||
"hooks": [{
|
||||
"type": "command",
|
||||
"command": "~/.claude/skills/continuous-learning/evaluate-session.sh"
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Why Stop Hook?
|
||||
|
||||
- **Lightweight**: Runs once at session end
|
||||
- **Non-blocking**: Doesn't add latency to every message
|
||||
- **Complete context**: Has access to full session transcript
|
||||
|
||||
## Related
|
||||
|
||||
- [The Longform Guide](https://x.com/affaanmustafa/status/2014040193557471352) - Section on continuous learning
|
||||
- `/learn` command - Manual pattern extraction mid-session
|
||||
221
skills/eval-harness/SKILL.md
Normal file
221
skills/eval-harness/SKILL.md
Normal file
@@ -0,0 +1,221 @@
|
||||
# Eval Harness Skill
|
||||
|
||||
A formal evaluation framework for Claude Code sessions, implementing eval-driven development (EDD) principles.
|
||||
|
||||
## Philosophy
|
||||
|
||||
Eval-Driven Development treats evals as the "unit tests of AI development":
|
||||
- Define expected behavior BEFORE implementation
|
||||
- Run evals continuously during development
|
||||
- Track regressions with each change
|
||||
- Use pass@k metrics for reliability measurement
|
||||
|
||||
## Eval Types
|
||||
|
||||
### Capability Evals
|
||||
Test if Claude can do something it couldn't before:
|
||||
```markdown
|
||||
[CAPABILITY EVAL: feature-name]
|
||||
Task: Description of what Claude should accomplish
|
||||
Success Criteria:
|
||||
- [ ] Criterion 1
|
||||
- [ ] Criterion 2
|
||||
- [ ] Criterion 3
|
||||
Expected Output: Description of expected result
|
||||
```
|
||||
|
||||
### Regression Evals
|
||||
Ensure changes don't break existing functionality:
|
||||
```markdown
|
||||
[REGRESSION EVAL: feature-name]
|
||||
Baseline: SHA or checkpoint name
|
||||
Tests:
|
||||
- existing-test-1: PASS/FAIL
|
||||
- existing-test-2: PASS/FAIL
|
||||
- existing-test-3: PASS/FAIL
|
||||
Result: X/Y passed (previously Y/Y)
|
||||
```
|
||||
|
||||
## Grader Types
|
||||
|
||||
### 1. Code-Based Grader
|
||||
Deterministic checks using code:
|
||||
```bash
|
||||
# Check if file contains expected pattern
|
||||
grep -q "export function handleAuth" src/auth.ts && echo "PASS" || echo "FAIL"
|
||||
|
||||
# Check if tests pass
|
||||
npm test -- --testPathPattern="auth" && echo "PASS" || echo "FAIL"
|
||||
|
||||
# Check if build succeeds
|
||||
npm run build && echo "PASS" || echo "FAIL"
|
||||
```
|
||||
|
||||
### 2. Model-Based Grader
|
||||
Use Claude to evaluate open-ended outputs:
|
||||
```markdown
|
||||
[MODEL GRADER PROMPT]
|
||||
Evaluate the following code change:
|
||||
1. Does it solve the stated problem?
|
||||
2. Is it well-structured?
|
||||
3. Are edge cases handled?
|
||||
4. Is error handling appropriate?
|
||||
|
||||
Score: 1-5 (1=poor, 5=excellent)
|
||||
Reasoning: [explanation]
|
||||
```
|
||||
|
||||
### 3. Human Grader
|
||||
Flag for manual review:
|
||||
```markdown
|
||||
[HUMAN REVIEW REQUIRED]
|
||||
Change: Description of what changed
|
||||
Reason: Why human review is needed
|
||||
Risk Level: LOW/MEDIUM/HIGH
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
### pass@k
|
||||
"At least one success in k attempts"
|
||||
- pass@1: First attempt success rate
|
||||
- pass@3: Success within 3 attempts
|
||||
- Typical target: pass@3 > 90%
|
||||
|
||||
### pass^k
|
||||
"All k trials succeed"
|
||||
- Higher bar for reliability
|
||||
- pass^3: 3 consecutive successes
|
||||
- Use for critical paths
|
||||
|
||||
## Eval Workflow
|
||||
|
||||
### 1. Define (Before Coding)
|
||||
```markdown
|
||||
## EVAL DEFINITION: feature-xyz
|
||||
|
||||
### Capability Evals
|
||||
1. Can create new user account
|
||||
2. Can validate email format
|
||||
3. Can hash password securely
|
||||
|
||||
### Regression Evals
|
||||
1. Existing login still works
|
||||
2. Session management unchanged
|
||||
3. Logout flow intact
|
||||
|
||||
### Success Metrics
|
||||
- pass@3 > 90% for capability evals
|
||||
- pass^3 = 100% for regression evals
|
||||
```
|
||||
|
||||
### 2. Implement
|
||||
Write code to pass the defined evals.
|
||||
|
||||
### 3. Evaluate
|
||||
```bash
|
||||
# Run capability evals
|
||||
[Run each capability eval, record PASS/FAIL]
|
||||
|
||||
# Run regression evals
|
||||
npm test -- --testPathPattern="existing"
|
||||
|
||||
# Generate report
|
||||
```
|
||||
|
||||
### 4. Report
|
||||
```markdown
|
||||
EVAL REPORT: feature-xyz
|
||||
========================
|
||||
|
||||
Capability Evals:
|
||||
create-user: PASS (pass@1)
|
||||
validate-email: PASS (pass@2)
|
||||
hash-password: PASS (pass@1)
|
||||
Overall: 3/3 passed
|
||||
|
||||
Regression Evals:
|
||||
login-flow: PASS
|
||||
session-mgmt: PASS
|
||||
logout-flow: PASS
|
||||
Overall: 3/3 passed
|
||||
|
||||
Metrics:
|
||||
pass@1: 67% (2/3)
|
||||
pass@3: 100% (3/3)
|
||||
|
||||
Status: READY FOR REVIEW
|
||||
```
|
||||
|
||||
## Integration Patterns
|
||||
|
||||
### Pre-Implementation
|
||||
```
|
||||
/eval define feature-name
|
||||
```
|
||||
Creates eval definition file at `.claude/evals/feature-name.md`
|
||||
|
||||
### During Implementation
|
||||
```
|
||||
/eval check feature-name
|
||||
```
|
||||
Runs current evals and reports status
|
||||
|
||||
### Post-Implementation
|
||||
```
|
||||
/eval report feature-name
|
||||
```
|
||||
Generates full eval report
|
||||
|
||||
## Eval Storage
|
||||
|
||||
Store evals in project:
|
||||
```
|
||||
.claude/
|
||||
evals/
|
||||
feature-xyz.md # Eval definition
|
||||
feature-xyz.log # Eval run history
|
||||
baseline.json # Regression baselines
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Define evals BEFORE coding** - Forces clear thinking about success criteria
|
||||
2. **Run evals frequently** - Catch regressions early
|
||||
3. **Track pass@k over time** - Monitor reliability trends
|
||||
4. **Use code graders when possible** - Deterministic > probabilistic
|
||||
5. **Human review for security** - Never fully automate security checks
|
||||
6. **Keep evals fast** - Slow evals don't get run
|
||||
7. **Version evals with code** - Evals are first-class artifacts
|
||||
|
||||
## Example: Adding Authentication
|
||||
|
||||
```markdown
|
||||
## EVAL: add-authentication
|
||||
|
||||
### Phase 1: Define (10 min)
|
||||
Capability Evals:
|
||||
- [ ] User can register with email/password
|
||||
- [ ] User can login with valid credentials
|
||||
- [ ] Invalid credentials rejected with proper error
|
||||
- [ ] Sessions persist across page reloads
|
||||
- [ ] Logout clears session
|
||||
|
||||
Regression Evals:
|
||||
- [ ] Public routes still accessible
|
||||
- [ ] API responses unchanged
|
||||
- [ ] Database schema compatible
|
||||
|
||||
### Phase 2: Implement (varies)
|
||||
[Write code]
|
||||
|
||||
### Phase 3: Evaluate
|
||||
Run: /eval check add-authentication
|
||||
|
||||
### Phase 4: Report
|
||||
EVAL REPORT: add-authentication
|
||||
==============================
|
||||
Capability: 5/5 passed (pass@3: 100%)
|
||||
Regression: 3/3 passed (pass^3: 100%)
|
||||
Status: SHIP IT
|
||||
```
|
||||
63
skills/strategic-compact/SKILL.md
Normal file
63
skills/strategic-compact/SKILL.md
Normal file
@@ -0,0 +1,63 @@
|
||||
---
|
||||
name: strategic-compact
|
||||
description: Suggests manual context compaction at logical intervals to preserve context through task phases rather than arbitrary auto-compaction.
|
||||
---
|
||||
|
||||
# Strategic Compact Skill
|
||||
|
||||
Suggests manual `/compact` at strategic points in your workflow rather than relying on arbitrary auto-compaction.
|
||||
|
||||
## Why Strategic Compaction?
|
||||
|
||||
Auto-compaction triggers at arbitrary points:
|
||||
- Often mid-task, losing important context
|
||||
- No awareness of logical task boundaries
|
||||
- Can interrupt complex multi-step operations
|
||||
|
||||
Strategic compaction at logical boundaries:
|
||||
- **After exploration, before execution** - Compact research context, keep implementation plan
|
||||
- **After completing a milestone** - Fresh start for next phase
|
||||
- **Before major context shifts** - Clear exploration context before different task
|
||||
|
||||
## How It Works
|
||||
|
||||
The `suggest-compact.sh` script runs on PreToolUse (Edit/Write) and:
|
||||
|
||||
1. **Tracks tool calls** - Counts tool invocations in session
|
||||
2. **Threshold detection** - Suggests at configurable threshold (default: 50 calls)
|
||||
3. **Periodic reminders** - Reminds every 25 calls after threshold
|
||||
|
||||
## Hook Setup
|
||||
|
||||
Add to your `~/.claude/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"hooks": {
|
||||
"PreToolUse": [{
|
||||
"matcher": "tool == \"Edit\" || tool == \"Write\"",
|
||||
"hooks": [{
|
||||
"type": "command",
|
||||
"command": "~/.claude/skills/strategic-compact/suggest-compact.sh"
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Environment variables:
|
||||
- `COMPACT_THRESHOLD` - Tool calls before first suggestion (default: 50)
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Compact after planning** - Once plan is finalized, compact to start fresh
|
||||
2. **Compact after debugging** - Clear error-resolution context before continuing
|
||||
3. **Don't compact mid-implementation** - Preserve context for related changes
|
||||
4. **Read the suggestion** - The hook tells you *when*, you decide *if*
|
||||
|
||||
## Related
|
||||
|
||||
- [The Longform Guide](https://x.com/affaanmustafa/status/2014040193557471352) - Token optimization section
|
||||
- Memory persistence hooks - For state that survives compaction
|
||||
120
skills/verification-loop/SKILL.md
Normal file
120
skills/verification-loop/SKILL.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# Verification Loop Skill
|
||||
|
||||
A comprehensive verification system for Claude Code sessions.
|
||||
|
||||
## When to Use
|
||||
|
||||
Invoke this skill:
|
||||
- After completing a feature or significant code change
|
||||
- Before creating a PR
|
||||
- When you want to ensure quality gates pass
|
||||
- After refactoring
|
||||
|
||||
## Verification Phases
|
||||
|
||||
### Phase 1: Build Verification
|
||||
```bash
|
||||
# Check if project builds
|
||||
npm run build 2>&1 | tail -20
|
||||
# OR
|
||||
pnpm build 2>&1 | tail -20
|
||||
```
|
||||
|
||||
If build fails, STOP and fix before continuing.
|
||||
|
||||
### Phase 2: Type Check
|
||||
```bash
|
||||
# TypeScript projects
|
||||
npx tsc --noEmit 2>&1 | head -30
|
||||
|
||||
# Python projects
|
||||
pyright . 2>&1 | head -30
|
||||
```
|
||||
|
||||
Report all type errors. Fix critical ones before continuing.
|
||||
|
||||
### Phase 3: Lint Check
|
||||
```bash
|
||||
# JavaScript/TypeScript
|
||||
npm run lint 2>&1 | head -30
|
||||
|
||||
# Python
|
||||
ruff check . 2>&1 | head -30
|
||||
```
|
||||
|
||||
### Phase 4: Test Suite
|
||||
```bash
|
||||
# Run tests with coverage
|
||||
npm run test -- --coverage 2>&1 | tail -50
|
||||
|
||||
# Check coverage threshold
|
||||
# Target: 80% minimum
|
||||
```
|
||||
|
||||
Report:
|
||||
- Total tests: X
|
||||
- Passed: X
|
||||
- Failed: X
|
||||
- Coverage: X%
|
||||
|
||||
### Phase 5: Security Scan
|
||||
```bash
|
||||
# Check for secrets
|
||||
grep -rn "sk-" --include="*.ts" --include="*.js" . 2>/dev/null | head -10
|
||||
grep -rn "api_key" --include="*.ts" --include="*.js" . 2>/dev/null | head -10
|
||||
|
||||
# Check for console.log
|
||||
grep -rn "console.log" --include="*.ts" --include="*.tsx" src/ 2>/dev/null | head -10
|
||||
```
|
||||
|
||||
### Phase 6: Diff Review
|
||||
```bash
|
||||
# Show what changed
|
||||
git diff --stat
|
||||
git diff HEAD~1 --name-only
|
||||
```
|
||||
|
||||
Review each changed file for:
|
||||
- Unintended changes
|
||||
- Missing error handling
|
||||
- Potential edge cases
|
||||
|
||||
## Output Format
|
||||
|
||||
After running all phases, produce a verification report:
|
||||
|
||||
```
|
||||
VERIFICATION REPORT
|
||||
==================
|
||||
|
||||
Build: [PASS/FAIL]
|
||||
Types: [PASS/FAIL] (X errors)
|
||||
Lint: [PASS/FAIL] (X warnings)
|
||||
Tests: [PASS/FAIL] (X/Y passed, Z% coverage)
|
||||
Security: [PASS/FAIL] (X issues)
|
||||
Diff: [X files changed]
|
||||
|
||||
Overall: [READY/NOT READY] for PR
|
||||
|
||||
Issues to Fix:
|
||||
1. ...
|
||||
2. ...
|
||||
```
|
||||
|
||||
## Continuous Mode
|
||||
|
||||
For long sessions, run verification every 15 minutes or after major changes:
|
||||
|
||||
```markdown
|
||||
Set a mental checkpoint:
|
||||
- After completing each function
|
||||
- After finishing a component
|
||||
- Before moving to next task
|
||||
|
||||
Run: /verify
|
||||
```
|
||||
|
||||
## Integration with Hooks
|
||||
|
||||
This skill complements PostToolUse hooks but provides deeper verification.
|
||||
Hooks catch issues immediately; this skill provides comprehensive review.
|
||||
Reference in New Issue
Block a user