Validates Jupyter notebook execution, outputs, and educational quality metrics
This skill provides comprehensive testing and validation capabilities for educational Jupyter notebooks, ensuring they execute correctly, produce expected outputs, and meet quality standards.
Activate this skill when you need to:
Purpose: Fast check that notebook doesn't crash Time: 1-2 minutes Command:
jupyter nbconvert --to notebook --execute notebook.ipynb \
--output tested.ipynb \
--ExecutePreprocessor.timeout=300
Pass criteria: No exceptions raised
Purpose: Verify complete execution with logging Time: 3-5 minutes Command:
jupyter nbconvert --to notebook --execute notebook.ipynb \
--output tested.ipynb \
--ExecutePreprocessor.timeout=600 \
--log-level=INFO
Pass criteria:
Purpose: Check educational quality metrics Time: 5-10 minutes Tool: Custom quality checker (see scripts/)
Pass criteria:
Located in scripts/validate_execution.py:
#!/usr/bin/env python
"""
Basic notebook execution validator.
Returns exit code 0 for success, 1 for failure.
"""
import sys
import subprocess
import json
from pathlib import Path
def validate_notebook(notebook_path, timeout=600):
"""Execute notebook and check for errors"""
output_path = Path(notebook_path).with_suffix('.tested.ipynb')
# Execute notebook
cmd = [
'jupyter', 'nbconvert',
'--to', 'notebook',
'--execute', str(notebook_path),
'--output', str(output_path),
f'--ExecutePreprocessor.timeout={timeout}'
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True
)
print(f"✅ PASS: {notebook_path}")
# Check for errors in output cells
with open(output_path, 'r', encoding='utf-8') as f:
nb = json.load(f)
errors = []
for i, cell in enumerate(nb['cells']):
if cell['cell_type'] == 'code':
for output in cell.get('outputs', []):
if output.get('output_type') == 'error':
errors.append({
'cell': i,
'error': output.get('ename'),
'message': output.get('evalue')
})
if errors:
print(f"⚠️ Errors found in outputs:")
for err in errors:
print(f" Cell {err['cell']}: {err['error']} - {err['message']}")
return False
return True
except subprocess.CalledProcessError as e:
print(f"❌ FAIL: {notebook_path}")
print(f"Error: {e.stderr}")
return False
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python validate_execution.py <notebook.ipynb>")
sys.exit(1)
notebook = sys.argv[1]
timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 600
success = validate_notebook(notebook, timeout)
sys.exit(0 if success else 1)
Located in scripts/calculate_quality.py:
#!/usr/bin/env python
"""
Calculate educational quality metrics for notebooks.
"""
import json
import sys
from pathlib import Path
def calculate_metrics(notebook_path):
"""Calculate notebook quality metrics"""
with open(notebook_path, 'r', encoding='utf-8') as f:
nb = json.load(f)
cells = nb['cells']
markdown_cells = [c for c in cells if c['cell_type'] == 'markdown']
code_cells = [c for c in cells if c['cell_type'] == 'code']
# Calculate character counts
markdown_chars = sum(
len(''.join(c['source']))
for c in markdown_cells
)
code_chars = sum(
len(''.join(c['source']))
for c in code_cells
)
total_chars = markdown_chars + code_chars
markdown_ratio = markdown_chars / total_chars if total_chars > 0 else 0
# Count exercises
exercise_keywords = ['exercise', 'task', 'todo', 'try it', 'your turn', 'practice']
exercises = sum(
1 for c in markdown_cells
if any(keyword in ''.join(c['source']).lower()
for keyword in exercise_keywords)
)
# Check for learning objectives
has_objectives = any(
'learning objective' in ''.join(c['source']).lower()
for c in markdown_cells
)
# Check for prerequisites
has_prerequisites = any(
'prerequisite' in ''.join(c['source']).lower()
for c in markdown_cells
)
# Calculate average cell length
avg_cell_length = total_chars / len(cells) if cells else 0
metrics = {
'total_cells': len(cells),
'markdown_cells': len(markdown_cells),
'code_cells': len(code_cells),
'markdown_ratio': round(markdown_ratio, 3),
'exercises_count': exercises,
'has_learning_objectives': has_objectives,
'has_prerequisites': has_prerequisites,
'avg_cell_length': round(avg_cell_length, 1)
}
return metrics
def check_quality_gates(metrics):
"""Check if metrics meet minimum standards"""
issues = []
if metrics['markdown_ratio'] < 0.30:
issues.append(
f"Markdown ratio {metrics['markdown_ratio']:.1%} below 30% target"
)
if metrics['exercises_count'] < 3:
issues.append(
f"Only {metrics['exercises_count']} exercises found (target: ≥3)"
)
if not metrics['has_learning_objectives']:
issues.append("Learning objectives not found")
if not metrics['has_prerequisites']:
issues.append("Prerequisites not documented")
return issues
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python calculate_quality.py <notebook.ipynb>")
sys.exit(1)
notebook = sys.argv[1]
metrics = calculate_metrics(notebook)
print(f"\n📊 Quality Metrics for {Path(notebook).name}")
print("=" * 50)
print(f"Total cells: {metrics['total_cells']}")
print(f"Markdown cells: {metrics['markdown_cells']}")
print(f"Code cells: {metrics['code_cells']}")
print(f"Markdown ratio: {metrics['markdown_ratio']:.1%} (target: ≥30%)")
print(f"Exercises: {metrics['exercises_count']} (target: ≥3)")
print(f"Learning objectives: {'✅' if metrics['has_learning_objectives'] else '❌'}")
print(f"Prerequisites: {'✅' if metrics['has_prerequisites'] else '❌'}")
print(f"Avg cell length: {metrics['avg_cell_length']:.0f} chars")
issues = check_quality_gates(metrics)
if issues:
print(f"\n⚠️ Quality Issues Found:")
for issue in issues:
print(f" - {issue}")
sys.exit(1)
else:
print(f"\n✅ All quality gates passed!")
sys.exit(0)
# Test Report: {Notebook Name}
**Date**: {timestamp}
**Status**: ✅ PASS / ❌ FAIL / ⚠️ WARNING
**Execution Time**: {duration} seconds
## Execution Results
- Total Cells: {count}
- Code Cells: {count}
- Cells Executed: {count}
- Cells with Errors: {count}
## Quality Metrics
- Markdown Ratio: {percentage}% (Target: ≥30%)
- Exercise Count: {count} (Target: ≥3)
- Learning Objectives: {present/missing}
- Prerequisites: {present/missing}
- Average Cell Length: {chars} characters
## Issues Found
### Critical Issues (🔴)
{List of blocking issues}
### Warnings (🟡)
{List of non-blocking issues}
## Recommendations
{Specific suggestions for improvement}
## Test Environment
- Python Version: {version}
- Jupyter Version: {version}
- Key Libraries: {versions}
Symptom: ModuleNotFoundError
Check:
pip list | grep {module_name}
Fix: Add to requirements.txt and reinstall
Symptom: FileNotFoundError
Check: Verify relative paths and data file existence
Fix:
Symptom: TimeoutError: Cell execution timed out
Check: Identify slow cells
Fix:
Symptom: MemoryError or kernel crash
Check: Dataset sizes and memory usage
Fix:
del + gc.collect())Symptom: Outputs vary between runs Check: Random operations without seeds Fix:
Symptom: No plots in output Check: Backend configuration Fix:
%matplotlib inline
import matplotlib.pyplot as plt
plt.show() # Explicitly show
Create conftest.py:
import pytest
from pathlib import Path
@pytest.fixture
def notebooks_dir():
"""Return path to notebooks directory"""
return Path("notebooks")
@pytest.fixture
def sample_data_dir():
"""Return path to sample data directory"""
return Path("data/sample")
@pytest.fixture(params=["beginner", "intermediate", "advanced"])
def difficulty_level(request):
"""Parameterize tests across difficulty levels"""
return request.param
Create tests/test_notebooks.py:
import pytest
import subprocess
from pathlib import Path
def test_notebook_executes(notebook_path):
"""Test that notebook executes without errors"""
cmd = [
'jupyter', 'nbconvert',
'--to', 'notebook',
'--execute', str(notebook_path),
'--output', '/tmp/test.ipynb',
'--ExecutePreprocessor.timeout=600'
]
result = subprocess.run(cmd, capture_output=True)
assert result.returncode == 0, f"Notebook failed: {result.stderr}"
def test_quality_metrics(notebook_path):
"""Test notebook meets quality standards"""
from scripts.calculate_quality import calculate_metrics, check_quality_gates
metrics = calculate_metrics(notebook_path)
issues = check_quality_gates(metrics)
assert len(issues) == 0, f"Quality issues: {issues}"
# Run with: pytest tests/
Create .github/workflows/test-notebooks.yml:
name: Test Notebooks
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest nbconvert
- name: Test notebook execution
run: |
python .claude/skills/notebook-tester/scripts/validate_execution.py notebooks/*.ipynb
- name: Check quality metrics
run: |
python .claude/skills/notebook-tester/scripts/calculate_quality.py notebooks/*.ipynb
When this skill is activated, you can:
scripts/ directoryA well-tested notebook:
# Basic execution test
jupyter nbconvert --to notebook --execute notebook.ipynb \
--output tested.ipynb --ExecutePreprocessor.timeout=600
# Quality metrics
python .claude/skills/notebook-tester/scripts/calculate_quality.py notebook.ipynb
# Full validation
python .claude/skills/notebook-tester/scripts/validate_execution.py notebook.ipynb
# pytest integration
pytest tests/test_notebooks.py -v
# Test all notebooks in directory
for nb in notebooks/*.ipynb; do
python .claude/skills/notebook-tester/scripts/validate_execution.py "$nb"
done
