Process and analyze CSV, JSON, and text files with data transformation, cleaning, analysis, and visualization capabilities
Process structured data files (CSV, JSON, text) with comprehensive capabilities for data cleaning, transformation, analysis, and export. This skill enables working with data files without requiring users to write code.
Use this skill when you need to:
Supported formats:
Available operations:
Available operations:
Available analyses:
Output formats:
When this skill is activated, follow these steps:
Ask clarifying questions if needed:
Use shell to load and process data:
# For CSV files
import csv
# Read from file path
with open('data.csv', 'r') as f:
reader = csv.DictReader(f)
data = list(reader)
# For JSON files
import json
with open('data.json', 'r') as f:
data = json.load(f)
Alternatively, use the supporting scripts:
# Execute the helper script
("scripts/process.py")
Apply the requested transformations or analyses:
# Example: Filter and aggregate
filtered = [row for row in data if float(row['amount']) > 100]
# Example: Calculate statistics
from statistics import mean, median
amounts = [float(row['amount']) for row in data]
avg = mean(amounts)
med = median(amounts)
Format results according to user needs:
# As markdown table
def to_markdown_table(data, columns=None):
if not data:
return "No data"
if columns is None:
columns = list(data[0].keys())
# Header
header = "| " + " | ".join(columns) + " |"
separator = "| " + " | ".join(["---"] * len(columns)) + " |"
# Rows
rows = []
for row in data:
row_str = "| " + " | ".join(str(row.get(col, "")) for col in columns) + " |"
rows.append(row_str)
return "\n".join([header, separator] + rows)
print(to_markdown_table(filtered))
# Example: Analyze sales data
import csv
from io import StringIO
from statistics import mean, sum as total
# Load CSV
reader = csv.DictReader(StringIO(file_content))
data = list(reader)
# Calculate metrics
total_sales = sum(float(row['amount']) for row in data)
avg_sales = mean(float(row['amount']) for row in data)
unique_customers = len(set(row['customer_id'] for row in data))
print(f"Total Sales: ${total_sales:,.2f}")
print(f"Average Sale: ${avg_sales:,.2f}")
print(f"Unique Customers: {unique_customers}")
# Example: Filter records by criteria
filtered = [
row for row in data
if row['status'] == 'active' and float(row['score']) >= 80
]
print(f"Found {len(filtered)} matching records")
# Example: Group and aggregate
from collections import defaultdict
grouped = defaultdict(list)
for row in data:
grouped[row['category']].append(float(row['value']))
summary = {}
for category, values in grouped.items():
summary[category] = {
'count': len(values),
'total': sum(values),
'average': sum(values) / len(values)
}
for category, stats in summary.items():
print(f"{category}: {stats['count']} items, avg = {stats['average']:.2f}")
# Example: CSV to JSON
import csv
import json
from io import StringIO
reader = csv.DictReader(StringIO(file_content))
data = list(reader)
# Convert to JSON
json_output = json.dumps(data, indent=2)
print(json_output)
scripts/process.py: Data processing utility functions# Extract
data = load_file(file_content)
# Transform
cleaned = remove_duplicates(data)
filtered = apply_filters(cleaned, conditions)
enriched = add_calculated_fields(filtered)
# Load (output)
output = format_as_markdown(enriched)
print(output)
# Pipeline: filter → group → aggregate → sort
result = (
filter_data(data, conditions)
| group_by(key='category')
| aggregate(metrics=['sum', 'average'])
| sort_by(column='total', descending=True)
)