Comprehensive skill for CellPhoneDB - Database of cell type markers and cell-cell communication analysis for single-cell data...
Comprehensive assistance with CellPhoneDB development, generated from official documentation.
This skill should be triggered when you need to:
Data Preparation & Analysis:
Cell-Cell Communication Analysis:
Advanced Applications:
Database Management:
import pandas as pd
import numpy as np
from cellphonedb.src.core.exceptions.ParseCountsException import ParseCountsException
# Validate meta DataFrame - ensure correct columns and indexes
def validate_meta(meta_raw):
"""Re-formats meta_raw if need be to ensure correct columns and indexes are present"""
meta = meta_raw.copy()
# Ensure proper indexing and column structure
return meta
# Validate counts DataFrame - ensure float32 type and cell consistency
def validate_counts(counts, meta):
"""Ensure that counts values are of type float32, and that all cells in meta exist in counts"""
if not len(counts.columns):
raise ParseCountsException('Counts values are not decimal values', 'Incorrect file format')
try:
if np.any(counts.dtypes.values != np.dtype('float32')):
counts = counts.astype(np.float32)
except Exception:
raise ParseCountsException
meta.index = meta.index.astype(str)
if np.any(~meta.index.isin(counts.columns)):
raise ParseCountsException("Some cells in meta did not exist in counts",
"Maybe incorrect file format")
if np.any(~counts.columns.isin(meta.index)):
counts = counts.loc[:, counts.columns.isin(meta.index)]
return counts
from typing import Tuple
import pandas as pd
import zipfile
import io
# Extract interaction data from CellPhoneDB database
def get_interactions_genes_complex(cpdb_file_path) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, dict, dict]:
"""Returns a tuple of four DataFrames containing data from CellPhoneDB database"""
# Extract csv files from database zip file
dbTableDFs = extract_dataframes_from_db(cpdb_file_path)
# Process gene synonym mappings
gene_synonym2gene_name = {}
if 'gene_synonym_to_gene_name' in dbTableDFs:
gs2gn = dbTableDFs['gene_synonym_to_gene_name']
gene_synonym2gene_name = dict(zip(gs2gn['Gene Synonym'], gs2gn['Gene Name']))
# Process multidata table and convert boolean columns
mtTable = dbTableDFs['multidata_table']
MULTIDATA_TABLE_BOOLEAN_COLS = ['receptor', 'other', 'secreted_highlight',
'transmembrane', 'secreted', 'peripheral', 'integrin', 'is_complex']
for col in MULTIDATA_TABLE_BOOLEAN_COLS:
mtTable[col] = mtTable[col].astype(bool)
# Build genes table by merging gene, protein, and multidata tables
genes = pd.merge(dbTableDFs['gene_table'], dbTableDFs['protein_table'],
left_on='protein_id', right_on='id_protein')
genes = pd.merge(genes, mtTable, left_on='protein_multidata_id', right_on='id_multidata')
# Build interactions table with proper suffixes
multidata_expanded = pd.concat([
pd.merge(dbTableDFs['protein_table'], mtTable, left_on='protein_multidata_id', right_on='id_multidata'),
pd.merge(mtTable, dbTableDFs['complex_table'], left_on='id_multidata', right_on='complex_multidata_id')
], ignore_index=True, sort=True)
interactions = pd.merge(dbTableDFs['interaction_table'], multidata_expanded, how='left',
left_on=['multidata_1_id'], right_on=['id_multidata'])
interactions = pd.merge(interactions, multidata_expanded, how='left',
left_on=['multidata_2_id'], right_on=['id_multidata'], suffixes=('_1', '_2'))
# Set indices for final dataframes
interactions.set_index('id_interaction', drop=True, inplace=True)
return interactions, genes, complex_composition, complex_expanded, gene_synonym2gene_name, receptor2tfs
# Install Python and Jupyter Notebook
# Follow instructions at https://docs.conda.io/en/latest/miniconda.html
conda create -n cpdb python=3.8
conda activate cpdb
pip install notebook
# Clone CellPhoneDB repository
cd <your_working_directory>
git clone git@github.com:ventolab/CellphoneDB.git
cd CellphoneDB/cellphonedb/notebooks
# Start Jupyter notebook
jupyter notebook
# Navigate to http://localhost:8888/notebooks/notebooks/cellphonedb.ipynb
# METHOD 1: Simple analysis - interaction means
# Use for quick exploration without statistical testing
cellphonedb method statistical_analysis meta.txt counts.txt --output-path results/
# METHOD 2: Statistical analysis - significance testing
# Use for identifying significant cell-type specific interactions
cellphonedb method statistical_analysis meta.txt counts.txt --output-path results/ --subsampling --threads 4
# METHOD 3: Differential expression analysis
# Use for custom comparisons with provided DEGs file
cellphonedb method degs_analysis meta.txt counts.txt degs.txt --output-path results/
# METHOD 4: Spatial microenvironments analysis
# Add spatial context to interaction analysis
cellphonedb method statistical_analysis meta.txt counts.txt --output-path results/ --microenvironments microenv.txt
# Meta file format (tab-separated):
# cell_name cell_type
# cell1 T_cell
# cell2 B_cell
# cell3 T_cell
# Counts file format (tab-separated, genes as rows, cells as columns):
# Gene cell1 cell2 cell3
# EGFR 5.2 0.0 3.1
# CD3D 8.7 1.2 9.4
# DEGs file format for METHOD 3 (tab-separated):
# gene cluster pval avg_log2FC
# IL2RA T_cell 0.001 2.3
# MS4A1 B_cell 0.0005 3.1
# Microenvironments file format (tab-separated):
# cell_type microenvironment
# T_cell immune_compartment
# B_cell immune_compartment
# epithelial tissue_compartment
# Run analysis with spatial constraints
cellphonedb method statistical_analysis meta.txt counts.txt \
--output-path results/ \
--microenvironments microenv.txt \
--threshold 0.1 # Minimum expression fraction
# Prepare transcription factor activity file
# Format: cell_type TF1 TF2 TF3
# T_cell 1.2 0.8 0.5
# B_cell 0.3 1.1 0.9
# Run analysis with TF activity integration
cellphonedb method statistical_analysis meta.txt counts.txt \
--output-path results/ \
--active-tfs tf_activity.txt \
--threshold 0.1
import os
def get_db_path(user_dir_root, db_version):
"""Retrieves the path to the local database file corresponding to db_version"""
return os.path.join(user_dir_root, "releases", db_version)
# Example usage:
user_dir = "/path/to/cellphonedb/data"
db_version = "v5.0"
db_path = get_db_path(user_dir, db_version)
# Returns: "/path/to/cellphonedb/data/releases/v5.0"
This skill includes comprehensive documentation in references/:
Essential for developers and advanced users:
Comprehensive guide for all analysis methods:
Quick start and setup information:
Use view to read specific reference files when detailed information is needed.
Organized documentation extracted from official sources:
Add your automation scripts here:
Store templates and reference materials:
To refresh this skill with updated documentation:
For the most current information, always cross-reference with the official CellPhoneDB documentation and GitHub repository.