Paper2Agent commited on
Commit
2f4e9fc
·
verified ·
1 Parent(s): 570dee0

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +11 -0
  2. README.md +3 -5
  3. requirements.txt +15 -0
  4. tissue_mcp.py +79 -0
  5. tools/tissue_readme.py +799 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN mkdir -p /tmp/numba_cache && chmod -R 777 /tmp/numba_cache
5
+ ENV NUMBA_CACHE_DIR=/tmp/numba_cache
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+ COPY tissue_mcp.py .
8
+ COPY tools/ tools/
9
+ RUN mkdir -p /app/data/upload /data/tmp_inputs /data/tmp_outputs && chmod -R 777 /app/data/upload /data
10
+ EXPOSE 7860
11
+ CMD ["uvicorn", "tissue_mcp:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,10 @@
1
  ---
2
  title: Tissue Mcp
3
- emoji: 🐨
4
- colorFrom: red
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
- short_description: Paper2Agent-generated TISSUE MCP server
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Tissue Mcp
3
+ emoji: 📈
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anndata
2
+ datetime
3
+ fastmcp
4
+ matplotlib
5
+ numpy
6
+ pandas
7
+ pathlib
8
+ scanpy
9
+ scikit_learn
10
+ tissue-sc
11
+ typing
12
+ uv
13
+ uvicorn
14
+ fastapi
15
+ starlette==0.47.3
tissue_mcp.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model Context Protocol (MCP) for TISSUE
3
+
4
+ TISSUE (Transcript Imputation with Spatial Single-cell Uncertainty Estimation) provides methods for spatial gene expression prediction and uncertainty quantification in spatial transcriptomics data. It enables uncertainty-aware analysis including multiple imputation, cell filtering, and weighted PCA for improved downstream analysis.
5
+
6
+ This MCP Server contains the tools extracted from the following tutorials:
7
+ 1. tissue
8
+ - predict_spatial_gene_expression: Predict spatial gene expression using paired spatial and scRNA-seq data
9
+ - calibrate_uncertainties_and_prediction_intervals: Use TISSUE to calibrate uncertainties and obtain prediction intervals
10
+ - multiple_imputation_hypothesis_testing: Hypothesis testing with TISSUE multiple imputation framework
11
+ - tissue_cell_filtering_for_supervised_learning: TISSUE cell filtering for supervised learning applications
12
+ - tissue_cell_filtering_for_pca: TISSUE cell filtering for PCA, clustering and visualization
13
+ - tissue_weighted_pca: TISSUE-WPCA (weighted principal component analysis)
14
+ """
15
+
16
+ import sys
17
+ from pathlib import Path
18
+ from fastmcp import FastMCP
19
+
20
+ # Import the MCP tools from the tools folder
21
+ from tools.tissue_readme import tissue_mcp
22
+
23
+ from starlette.requests import Request
24
+ from starlette.responses import PlainTextResponse, JSONResponse
25
+ import os
26
+ from fastapi.staticfiles import StaticFiles
27
+ import uuid
28
+
29
+ # Define the MCP server
30
+ mcp = FastMCP(name = "TISSUE")
31
+
32
+ # Mount the tools
33
+ mcp.mount(tissue_mcp)
34
+
35
+ # Use absolute directory for uploads
36
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
37
+ UPLOAD_DIR = os.path.join(BASE_DIR, "/data/upload")
38
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
39
+
40
+ @mcp.custom_route("/health", methods=["GET"])
41
+ async def health_check(request: Request) -> PlainTextResponse:
42
+ return PlainTextResponse("OK")
43
+
44
+
45
+ @mcp.custom_route("/", methods=["GET"])
46
+ async def index(request: Request) -> PlainTextResponse:
47
+ return PlainTextResponse("MCP is on https://Paper2Agent-tissue-mcp.hf.space/mcp")
48
+
49
+ # Upload route
50
+ @mcp.custom_route("/upload", methods=["POST"])
51
+ async def upload(request: Request):
52
+ form = await request.form()
53
+ up = form.get("file")
54
+ if up is None:
55
+ return JSONResponse({"error": "missing form field 'file'"}, status_code=400)
56
+
57
+ # Generate a safe filename
58
+ orig = getattr(up, "filename", "") or ""
59
+ ext = os.path.splitext(orig)[1]
60
+ name = f"{uuid.uuid4().hex}{ext}"
61
+ dst = os.path.join(UPLOAD_DIR, name)
62
+
63
+ # up is a Starlette UploadFile-like object
64
+ with open(dst, "wb") as out:
65
+ out.write(await up.read())
66
+
67
+ # Return only the absolute local path
68
+ abs_path = os.path.abspath(dst)
69
+ return JSONResponse({"path": abs_path})
70
+
71
+ app = mcp.http_app(path="/mcp")
72
+ # Saved uploaded input files
73
+ app.mount("/files", StaticFiles(directory=UPLOAD_DIR), name="files")
74
+ # Saved output files
75
+ app.mount("/outputs", StaticFiles(directory="/data/tmp_outputs"), name="outputs")
76
+
77
+ # Run the MCP server
78
+ if __name__ == "__main__":
79
+ mcp.run(transport="http", host="127.0.0.1", port=8003)
tools/tissue_readme.py ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TISSUE (Transcript Imputation with Spatial Single-cell Uncertainty Estimation) tutorial implementations.
3
+
4
+ This MCP Server provides 6 tools:
5
+ 1. predict_spatial_gene_expression: Predict spatial gene expression using paired spatial and scRNA-seq data
6
+ 2. calibrate_uncertainties_and_prediction_intervals: Use TISSUE to calibrate uncertainties and obtain prediction intervals
7
+ 3. multiple_imputation_hypothesis_testing: Hypothesis testing with TISSUE multiple imputation framework
8
+ 4. tissue_cell_filtering_for_supervised_learning: TISSUE cell filtering for supervised learning applications
9
+ 5. tissue_cell_filtering_for_pca: TISSUE cell filtering for PCA, clustering and visualization
10
+ 6. tissue_weighted_pca: TISSUE-WPCA (weighted principal component analysis)
11
+
12
+ All tools extracted from TISSUE/README.md.
13
+ """
14
+
15
+ # Standard imports
16
+ from typing import Annotated, Literal, Any
17
+ import pandas as pd
18
+ import numpy as np
19
+ from pathlib import Path
20
+ import os
21
+ from fastmcp import FastMCP
22
+ from datetime import datetime
23
+ import matplotlib.pyplot as plt
24
+ import anndata as ad
25
+
26
+ # Import TISSUE modules
27
+ import tissue.main
28
+ import tissue.downstream
29
+
30
+ # scikit-learn imports
31
+ from sklearn.linear_model import LogisticRegression
32
+ from sklearn.preprocessing import StandardScaler
33
+ from sklearn.metrics import accuracy_score, roc_auc_score, adjusted_rand_score
34
+ from sklearn.cluster import KMeans
35
+
36
+ # Base persistent directory (HF Spaces guarantees /data is writable & persistent)
37
+ BASE_DIR = Path("/data")
38
+
39
+ DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs"
40
+ DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs"
41
+
42
+ INPUT_DIR = Path(os.environ.get("TISSUE_INPUT_DIR", DEFAULT_INPUT_DIR))
43
+ OUTPUT_DIR = Path(os.environ.get("TISSUE_OUTPUT_DIR", DEFAULT_OUTPUT_DIR))
44
+
45
+ # Ensure directories exist
46
+ INPUT_DIR.mkdir(parents=True, exist_ok=True)
47
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
48
+
49
+ # Timestamp for unique outputs
50
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
51
+
52
+ # MCP server instance
53
+ tissue_mcp = FastMCP(name="tissue_readme")
54
+
55
+ @tissue_mcp.tool
56
+ def predict_spatial_gene_expression(
57
+ spatial_count_path: Annotated[str, "Path to spatial count matrix file (tab-delimited text format). The header should include gene names and rows should be cells."],
58
+ locations_path: Annotated[str, "Path to spatial locations file (tab-delimited text format). Should contain x and y coordinates for each cell."],
59
+ scrna_count_path: Annotated[str, "Path to scRNA-seq count matrix file (tab-delimited text format). The header should include gene names and rows should be cells."],
60
+ target_gene: Annotated[str, "Target gene name to predict (must be present in both datasets)"] = "plp1",
61
+ prediction_method: Annotated[Literal["spage", "tangram", "harmony"], "Method for spatial gene expression prediction"] = "spage",
62
+ n_folds: Annotated[int, "Number of cross-validation folds for prediction"] = 10,
63
+ n_pv: Annotated[int, "Number of principal components for SpaGE method"] = 10,
64
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
65
+ ) -> dict:
66
+ """
67
+ Predict spatial gene expression using paired spatial and scRNA-seq data with TISSUE.
68
+ Input is spatial count matrix, locations, and scRNA-seq data and output is prediction visualization and results.
69
+ """
70
+
71
+ # Set output prefix
72
+ if out_prefix is None:
73
+ out_prefix = f"tissue_prediction_{timestamp}"
74
+
75
+ # Load paired datasets
76
+ adata, RNAseq_adata = tissue.main.load_paired_datasets(
77
+ spatial_count_path, locations_path, scrna_count_path
78
+ )
79
+
80
+ # Preprocess data
81
+ adata.var_names = [x.lower() for x in adata.var_names]
82
+ RNAseq_adata.var_names = [x.lower() for x in RNAseq_adata.var_names]
83
+
84
+ # Preprocess RNAseq data
85
+ tissue.main.preprocess_data(RNAseq_adata, standardize=False, normalize=True)
86
+
87
+ # Get shared genes
88
+ gene_names = np.intersect1d(adata.var_names, RNAseq_adata.var_names)
89
+ adata = adata[:, gene_names].copy()
90
+
91
+ # Validate target gene exists
92
+ target_gene_lower = target_gene.lower()
93
+ if target_gene_lower not in adata.var_names:
94
+ raise ValueError(f"Target gene '{target_gene}' not found in spatial data")
95
+
96
+ # Hold out target gene for validation
97
+ target_expn = adata[:, target_gene_lower].X.copy()
98
+ adata = adata[:, [gene for gene in gene_names if gene != target_gene_lower]].copy()
99
+
100
+ # Predict gene expression
101
+ tissue.main.predict_gene_expression(
102
+ adata, RNAseq_adata, [target_gene_lower],
103
+ method=prediction_method, n_folds=n_folds, n_pv=n_pv
104
+ )
105
+
106
+ # Create visualization
107
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
108
+
109
+ # Plot actual expression
110
+ ax1.axis('off')
111
+ cmap_actual = target_expn.copy()
112
+ cmap_actual[cmap_actual < 0] = 0
113
+ cmap_actual = np.log1p(cmap_actual)
114
+ cmap_actual[cmap_actual > np.percentile(cmap_actual, 95)] = np.percentile(cmap_actual, 95)
115
+ im1 = ax1.scatter(adata.obsm['spatial'][:, 0], adata.obsm['spatial'][:, 1],
116
+ s=1, c=cmap_actual, rasterized=True)
117
+ ax1.set_title('Actual', fontsize=12)
118
+
119
+ cbar1 = fig.colorbar(im1, ax=ax1)
120
+ cbar1.ax.get_yaxis().labelpad = 15
121
+ cbar1.ax.set_ylabel('Log Expression', rotation=270)
122
+
123
+ # Plot predicted expression
124
+ ax2.axis('off')
125
+ pred_key = f"{prediction_method}_predicted_expression"
126
+ cmap_pred = adata.obsm[pred_key][target_gene_lower].values.copy()
127
+ cmap_pred[cmap_pred < 0] = 0
128
+ cmap_pred = np.log1p(cmap_pred)
129
+ cmap_pred[cmap_pred > np.percentile(cmap_pred, 95)] = np.percentile(cmap_pred, 95)
130
+ im2 = ax2.scatter(adata.obsm['spatial'][:, 0], adata.obsm['spatial'][:, 1],
131
+ s=1, c=cmap_pred, rasterized=True)
132
+ ax2.set_title('Predicted', fontsize=12)
133
+
134
+ cbar2 = fig.colorbar(im2, ax=ax2)
135
+ cbar2.ax.get_yaxis().labelpad = 15
136
+ cbar2.ax.set_ylabel('Log Expression', rotation=270)
137
+
138
+ plt.suptitle(f"{prediction_method.upper()} Prediction", fontsize=16)
139
+ plt.tight_layout()
140
+
141
+ # Save figure
142
+ fig_path = OUTPUT_DIR / f"{out_prefix}_spatial_prediction.png"
143
+ plt.savefig(fig_path, dpi=300, bbox_inches='tight')
144
+ plt.close()
145
+
146
+ # Save results
147
+ results_df = pd.DataFrame({
148
+ 'cell_id': range(len(adata.obs)),
149
+ 'x_coord': adata.obsm['spatial'][:, 0],
150
+ 'y_coord': adata.obsm['spatial'][:, 1],
151
+ 'actual_expression': target_expn.flatten(),
152
+ 'predicted_expression': adata.obsm[pred_key][target_gene_lower].values
153
+ })
154
+
155
+ results_path = OUTPUT_DIR / f"{out_prefix}_prediction_results.csv"
156
+ results_df.to_csv(results_path, index=False)
157
+
158
+ # Save processed AnnData for downstream use
159
+ adata_path = OUTPUT_DIR / f"{out_prefix}_processed_adata.h5ad"
160
+ adata.write_h5ad(adata_path)
161
+
162
+ return {
163
+ "message": f"Spatial gene expression prediction completed for {target_gene}",
164
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
165
+ "artifacts": [
166
+ {
167
+ "description": "Spatial prediction visualization",
168
+ "path": str(fig_path.resolve())
169
+ },
170
+ {
171
+ "description": "Prediction results table",
172
+ "path": str(results_path.resolve())
173
+ },
174
+ {
175
+ "description": "Processed AnnData object",
176
+ "path": str(adata_path.resolve())
177
+ }
178
+ ]
179
+ }
180
+
181
+
182
+ @tissue_mcp.tool
183
+ def calibrate_uncertainties_and_prediction_intervals(
184
+ adata_path: Annotated[str, "Path to processed AnnData file from predict_spatial_gene_expression"],
185
+ target_gene: Annotated[str, "Target gene name for visualization"] = "plp1",
186
+ prediction_method: Annotated[str, "Prediction method used (spage, tangram, harmony)"] = "spage",
187
+ n_neighbors: Annotated[int, "Number of neighbors for spatial graph construction"] = 15,
188
+ grouping_method: Annotated[Literal["kmeans_gene_cell", "kmeans_gene", "kmeans_cell"], "Method for stratified grouping"] = "kmeans_gene_cell",
189
+ k: Annotated[int, "Number of gene groups for calibration"] = 4,
190
+ k2: Annotated[int, "Number of cell groups for calibration"] = 2,
191
+ alpha_level: Annotated[float, "Alpha level for prediction intervals (1-alpha coverage)"] = 0.23,
192
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
193
+ ) -> dict:
194
+ """
195
+ Use TISSUE to calibrate uncertainties and obtain prediction intervals for spatial predictions.
196
+ Input is processed AnnData with predictions and output is uncertainty calibration and interval visualization.
197
+ """
198
+
199
+ # Set output prefix
200
+ if out_prefix is None:
201
+ out_prefix = f"tissue_calibration_{timestamp}"
202
+
203
+ # Load processed data
204
+ adata = ad.read_h5ad(adata_path)
205
+ target_gene_lower = target_gene.lower()
206
+
207
+ # Build spatial graph
208
+ tissue.main.build_spatial_graph(adata, method="fixed_radius", n_neighbors=n_neighbors)
209
+
210
+ # Build calibration scores
211
+ pred_key = f"{prediction_method}_predicted_expression"
212
+ tissue.main.conformalize_spatial_uncertainty(
213
+ adata, pred_key, calib_genes=adata.var_names,
214
+ grouping_method=grouping_method, k=k, k2=k2
215
+ )
216
+
217
+ # Get prediction intervals
218
+ tissue.main.conformalize_prediction_interval(
219
+ adata, pred_key, calib_genes=adata.var_names, alpha_level=alpha_level
220
+ )
221
+
222
+ # Create visualization for prediction intervals
223
+ m = prediction_method
224
+
225
+ # Get target gene data for validation if available
226
+ target_expn = None
227
+ if hasattr(adata, 'uns') and 'target_expression' in adata.uns:
228
+ target_expn = adata.uns['target_expression']
229
+
230
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
231
+
232
+ if target_expn is not None:
233
+ # Plot imputation error
234
+ ax1.axis('off')
235
+ cmap_error = np.abs(target_expn.flatten() - adata.obsm[f"{m}_predicted_expression"][target_gene_lower].values)
236
+ cmap_error[cmap_error < 0] = 0
237
+ cmap_error = np.log1p(cmap_error)
238
+ cmap_error[cmap_error > np.percentile(cmap_error, 95)] = np.percentile(cmap_error, 95)
239
+ im1 = ax1.scatter(adata.obsm['spatial'][:, 0], adata.obsm['spatial'][:, 1],
240
+ s=1, c=cmap_error, rasterized=True)
241
+ ax1.set_title(f'Imputation Error {target_gene_lower}', fontsize=12)
242
+ else:
243
+ # Plot predicted expression if no ground truth
244
+ ax1.axis('off')
245
+ cmap_pred = adata.obsm[f"{m}_predicted_expression"][target_gene_lower].values.copy()
246
+ cmap_pred[cmap_pred < 0] = 0
247
+ cmap_pred = np.log1p(cmap_pred)
248
+ im1 = ax1.scatter(adata.obsm['spatial'][:, 0], adata.obsm['spatial'][:, 1],
249
+ s=1, c=cmap_pred, rasterized=True)
250
+ ax1.set_title(f'Predicted Expression {target_gene_lower}', fontsize=12)
251
+
252
+ cbar1 = fig.colorbar(im1, ax=ax1)
253
+ cbar1.ax.get_yaxis().labelpad = 15
254
+ cbar1.ax.set_ylabel('Log Expression', rotation=270)
255
+
256
+ # Plot prediction interval width
257
+ ax2.axis('off')
258
+ pi_width = (adata.obsm[f"{m}_predicted_expression_hi"][target_gene_lower].values -
259
+ adata.obsm[f"{m}_predicted_expression_lo"][target_gene_lower].values)
260
+ pi_width[pi_width < 0] = 0
261
+ pi_width = np.log1p(pi_width)
262
+ pi_width[pi_width > np.percentile(pi_width, 95)] = np.percentile(pi_width, 95)
263
+ im2 = ax2.scatter(adata.obsm['spatial'][:, 0], adata.obsm['spatial'][:, 1],
264
+ s=1, c=pi_width, rasterized=True)
265
+ ax2.set_title(f'PI Width {target_gene_lower}', fontsize=12)
266
+
267
+ cbar2 = fig.colorbar(im2, ax=ax2)
268
+ cbar2.ax.get_yaxis().labelpad = 15
269
+ cbar2.ax.set_ylabel('Log Expression', rotation=270)
270
+
271
+ plt.suptitle(m.upper(), fontsize=16)
272
+ plt.tight_layout()
273
+
274
+ # Save figure
275
+ fig_path = OUTPUT_DIR / f"{out_prefix}_prediction_intervals.png"
276
+ plt.savefig(fig_path, dpi=300, bbox_inches='tight')
277
+ plt.close()
278
+
279
+ # Save calibrated data
280
+ calibrated_path = OUTPUT_DIR / f"{out_prefix}_calibrated_adata.h5ad"
281
+ adata.write_h5ad(calibrated_path)
282
+
283
+ # Save prediction intervals data
284
+ intervals_df = pd.DataFrame({
285
+ 'cell_id': range(len(adata.obs)),
286
+ 'x_coord': adata.obsm['spatial'][:, 0],
287
+ 'y_coord': adata.obsm['spatial'][:, 1],
288
+ f'{target_gene_lower}_predicted': adata.obsm[f"{m}_predicted_expression"][target_gene_lower].values,
289
+ f'{target_gene_lower}_pi_lower': adata.obsm[f"{m}_predicted_expression_lo"][target_gene_lower].values,
290
+ f'{target_gene_lower}_pi_upper': adata.obsm[f"{m}_predicted_expression_hi"][target_gene_lower].values,
291
+ f'{target_gene_lower}_pi_width': pi_width
292
+ })
293
+
294
+ intervals_path = OUTPUT_DIR / f"{out_prefix}_prediction_intervals.csv"
295
+ intervals_df.to_csv(intervals_path, index=False)
296
+
297
+ return {
298
+ "message": f"Uncertainty calibration and prediction intervals completed (α={alpha_level})",
299
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
300
+ "artifacts": [
301
+ {
302
+ "description": "Prediction intervals visualization",
303
+ "path": str(fig_path.resolve())
304
+ },
305
+ {
306
+ "description": "Calibrated AnnData object",
307
+ "path": str(calibrated_path.resolve())
308
+ },
309
+ {
310
+ "description": "Prediction intervals data",
311
+ "path": str(intervals_path.resolve())
312
+ }
313
+ ]
314
+ }
315
+
316
+
317
+ @tissue_mcp.tool
318
+ def multiple_imputation_hypothesis_testing(
319
+ adata_path: Annotated[str, "Path to calibrated AnnData file from calibrate_uncertainties_and_prediction_intervals"],
320
+ prediction_method: Annotated[str, "Prediction method used (spage, tangram, harmony)"] = "spage",
321
+ condition_key: Annotated[str, "Key in adata.obs for condition labels"] = "condition",
322
+ group1: Annotated[str, "First group label for comparison"] = "A",
323
+ group2: Annotated[str, "Second group label for comparison"] = "B",
324
+ n_imputations: Annotated[int, "Number of multiple imputations to use"] = 10,
325
+ test_method: Annotated[Literal["ttest", "spatialde", "wilcoxon_greater", "wilcoxon_less"], "Statistical test method"] = "ttest",
326
+ target_gene: Annotated[str, "Target gene for reporting results"] = "plp1",
327
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
328
+ ) -> dict:
329
+ """
330
+ Perform hypothesis testing with TISSUE multiple imputation framework for differential gene expression.
331
+ Input is calibrated AnnData with conditions and output is statistical test results and condition visualization.
332
+ """
333
+
334
+ # Set output prefix
335
+ if out_prefix is None:
336
+ out_prefix = f"tissue_hypothesis_test_{timestamp}"
337
+
338
+ # Load calibrated data
339
+ adata = ad.read_h5ad(adata_path)
340
+ target_gene_lower = target_gene.lower()
341
+
342
+ # Create condition labels if they don't exist
343
+ if condition_key not in adata.obs.columns:
344
+ # Split into two groups based on indices (as in tutorial)
345
+ adata.obs[condition_key] = [group1 if i < round(adata.shape[0]/2) else group2
346
+ for i in range(adata.shape[0])]
347
+
348
+ # Plot conditions
349
+ plt.figure(figsize=(8, 6))
350
+ plt.scatter(adata[adata.obs[condition_key] == group1].obsm['spatial'][:, 0],
351
+ adata[adata.obs[condition_key] == group1].obsm['spatial'][:, 1],
352
+ c='tab:red', s=3, label=group1)
353
+ plt.scatter(adata[adata.obs[condition_key] == group2].obsm['spatial'][:, 0],
354
+ adata[adata.obs[condition_key] == group2].obsm['spatial'][:, 1],
355
+ c='tab:blue', s=3, label=group2)
356
+ plt.legend(loc='best')
357
+ plt.title('Condition Groups for Hypothesis Testing')
358
+
359
+ # Save condition plot
360
+ condition_fig_path = OUTPUT_DIR / f"{out_prefix}_conditions.png"
361
+ plt.savefig(condition_fig_path, dpi=300, bbox_inches='tight')
362
+ plt.close()
363
+
364
+ # Perform multiple imputation hypothesis testing
365
+ pred_key = f"{prediction_method}_predicted_expression"
366
+ tissue.downstream.multiple_imputation_testing(
367
+ adata, pred_key,
368
+ calib_genes=adata.var_names,
369
+ condition=condition_key,
370
+ group1=group1,
371
+ group2=group2,
372
+ n_imputations=n_imputations,
373
+ test=test_method
374
+ )
375
+
376
+ # Extract results for all genes
377
+ tstat_key = f"{prediction_method}_{group1}_{group2}_tstat"
378
+ pvalue_key = f"{prediction_method}_{group1}_{group2}_pvalue"
379
+
380
+ results_data = []
381
+ for gene in adata.var_names:
382
+ if gene in adata.uns[tstat_key]:
383
+ tstat = adata.uns[tstat_key][gene].values[0]
384
+ pval = adata.uns[pvalue_key][gene].values[0]
385
+ results_data.append({
386
+ 'gene': gene,
387
+ 't_statistic': tstat,
388
+ 'p_value': pval,
389
+ 'significant_05': pval < 0.05,
390
+ 'significant_01': pval < 0.01
391
+ })
392
+
393
+ results_df = pd.DataFrame(results_data)
394
+ results_df = results_df.sort_values('p_value')
395
+
396
+ # Save results
397
+ results_path = OUTPUT_DIR / f"{out_prefix}_hypothesis_test_results.csv"
398
+ results_df.to_csv(results_path, index=False)
399
+
400
+ # Get target gene results
401
+ target_results = results_df[results_df['gene'] == target_gene_lower]
402
+ if not target_results.empty:
403
+ target_tstat = target_results.iloc[0]['t_statistic']
404
+ target_pval = target_results.iloc[0]['p_value']
405
+ target_message = f"Target gene {target_gene}: t-stat={target_tstat:.5f}, p={target_pval:.5f}"
406
+ else:
407
+ target_message = f"Target gene {target_gene} not found in results"
408
+
409
+ n_significant = (results_df['p_value'] < 0.05).sum()
410
+
411
+ return {
412
+ "message": f"Hypothesis testing completed: {n_significant} significant genes (p<0.05). {target_message}",
413
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
414
+ "artifacts": [
415
+ {
416
+ "description": "Condition groups visualization",
417
+ "path": str(condition_fig_path.resolve())
418
+ },
419
+ {
420
+ "description": "Hypothesis test results",
421
+ "path": str(results_path.resolve())
422
+ }
423
+ ]
424
+ }
425
+
426
+
427
+ @tissue_mcp.tool
428
+ def tissue_cell_filtering_for_supervised_learning(
429
+ adata_path: Annotated[str, "Path to calibrated AnnData file from calibrate_uncertainties_and_prediction_intervals"],
430
+ prediction_method: Annotated[str, "Prediction method used (spage, tangram, harmony)"] = "spage",
431
+ condition_key: Annotated[str, "Key in adata.obs for condition labels"] = "condition",
432
+ group1: Annotated[str, "First group label"] = "A",
433
+ group2: Annotated[str, "Second group label"] = "B",
434
+ filter_proportion: Annotated[str | float, "Proportion of cells to filter ('otsu' for automatic or float 0-1)"] = "otsu",
435
+ train_test_split: Annotated[float, "Proportion for training set"] = 0.8,
436
+ random_seed: Annotated[int, "Random seed for reproducibility"] = 444,
437
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
438
+ ) -> dict:
439
+ """
440
+ Apply TISSUE cell filtering for supervised learning to improve classifier performance.
441
+ Input is calibrated AnnData with conditions and output is filtering results and classifier performance metrics.
442
+ """
443
+
444
+ # Set output prefix
445
+ if out_prefix is None:
446
+ out_prefix = f"tissue_supervised_learning_{timestamp}"
447
+
448
+ # Load calibrated data
449
+ adata = ad.read_h5ad(adata_path)
450
+
451
+ # Create condition labels if they don't exist
452
+ if condition_key not in adata.obs.columns:
453
+ adata.obs[condition_key] = [group1 if i < round(adata.shape[0]/2) else group2
454
+ for i in range(adata.shape[0])]
455
+
456
+ # Get uncertainty (PI width) for filtering
457
+ pred_key = prediction_method
458
+ pi_hi_key = f"{pred_key}_predicted_expression_hi"
459
+ pi_lo_key = f"{pred_key}_predicted_expression_lo"
460
+
461
+ X_uncertainty = adata.obsm[pi_hi_key].values - adata.obsm[pi_lo_key].values
462
+
463
+ # Uncertainty-based cell filtering
464
+ keep_idxs = tissue.downstream.detect_uncertain_cells(
465
+ X_uncertainty,
466
+ proportion=filter_proportion,
467
+ stratification=adata.obs[condition_key].values
468
+ )
469
+
470
+ adata_filtered = adata[adata.obs_names[keep_idxs], :].copy()
471
+
472
+ # Print filtering stats
473
+ print(f"Before TISSUE cell filtering: {adata.shape}")
474
+ print(f"After TISSUE cell filtering: {adata_filtered.shape}")
475
+
476
+ # Check label balance
477
+ balance_df = pd.DataFrame(
478
+ np.unique(adata_filtered.obs[condition_key], return_counts=True),
479
+ index=["Group", "Number of Cells"]
480
+ )
481
+ print(f"Label balance after filtering:\n{balance_df}")
482
+
483
+ # Split train and test randomly
484
+ np.random.seed(random_seed)
485
+ n_cells = adata_filtered.shape[0]
486
+ train_size = round(n_cells * train_test_split)
487
+ train_idxs = np.random.choice(np.arange(n_cells), train_size, replace=False)
488
+ test_idxs = np.array([idx for idx in np.arange(n_cells) if idx not in train_idxs])
489
+
490
+ pred_expression_key = f"{pred_key}_predicted_expression"
491
+ train_data = adata_filtered.obsm[pred_expression_key].values[train_idxs, :]
492
+ train_labels = adata_filtered.obs[condition_key].iloc[train_idxs]
493
+
494
+ test_data = adata_filtered.obsm[pred_expression_key].values[test_idxs, :]
495
+ test_labels = adata_filtered.obs[condition_key].iloc[test_idxs]
496
+
497
+ # Scale data and train model
498
+ scaler = StandardScaler()
499
+ train_data_scaled = scaler.fit_transform(train_data)
500
+
501
+ # Fit logistic regression model
502
+ model = LogisticRegression(penalty='l1', solver='liblinear', random_state=random_seed)
503
+ model.fit(train_data_scaled, train_labels)
504
+
505
+ # Make predictions on test data
506
+ test_data_scaled = scaler.transform(test_data)
507
+ pred_test = model.predict(test_data_scaled)
508
+ pred_test_proba = model.predict_proba(test_data_scaled)
509
+
510
+ # Calculate metrics
511
+ test_labels_num = [0 if x == group1 else 1 for x in test_labels]
512
+ accuracy = accuracy_score(test_labels, pred_test)
513
+ roc_auc = roc_auc_score(test_labels_num, pred_test_proba[:, 1])
514
+
515
+ # Save results
516
+ results_df = pd.DataFrame({
517
+ 'metric': ['cells_before_filtering', 'cells_after_filtering', 'cells_filtered_out',
518
+ 'train_size', 'test_size', 'accuracy_score', 'roc_auc_score'],
519
+ 'value': [adata.shape[0], adata_filtered.shape[0], adata.shape[0] - adata_filtered.shape[0],
520
+ len(train_idxs), len(test_idxs), accuracy, roc_auc]
521
+ })
522
+
523
+ results_path = OUTPUT_DIR / f"{out_prefix}_supervised_learning_results.csv"
524
+ results_df.to_csv(results_path, index=False)
525
+
526
+ # Save filtered data
527
+ filtered_path = OUTPUT_DIR / f"{out_prefix}_filtered_adata.h5ad"
528
+ adata_filtered.write_h5ad(filtered_path)
529
+
530
+ # Save model predictions
531
+ predictions_df = pd.DataFrame({
532
+ 'cell_id': test_idxs,
533
+ 'true_label': test_labels,
534
+ 'predicted_label': pred_test,
535
+ 'prediction_probability': pred_test_proba[:, 1]
536
+ })
537
+
538
+ predictions_path = OUTPUT_DIR / f"{out_prefix}_test_predictions.csv"
539
+ predictions_df.to_csv(predictions_path, index=False)
540
+
541
+ return {
542
+ "message": f"TISSUE cell filtering for supervised learning completed. Accuracy: {accuracy:.3f}, ROC-AUC: {roc_auc:.3f}",
543
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
544
+ "artifacts": [
545
+ {
546
+ "description": "Supervised learning results",
547
+ "path": str(results_path.resolve())
548
+ },
549
+ {
550
+ "description": "Filtered AnnData object",
551
+ "path": str(filtered_path.resolve())
552
+ },
553
+ {
554
+ "description": "Test set predictions",
555
+ "path": str(predictions_path.resolve())
556
+ }
557
+ ]
558
+ }
559
+
560
+
561
+ @tissue_mcp.tool
562
+ def tissue_cell_filtering_for_pca(
563
+ adata_path: Annotated[str, "Path to calibrated AnnData file from calibrate_uncertainties_and_prediction_intervals"],
564
+ prediction_method: Annotated[str, "Prediction method used (spage, tangram, harmony)"] = "spage",
565
+ condition_key: Annotated[str, "Key in adata.obs for condition labels"] = "condition",
566
+ group1: Annotated[str, "First group label"] = "A",
567
+ group2: Annotated[str, "Second group label"] = "B",
568
+ filter_proportion: Annotated[str | float, "Proportion of cells to filter ('otsu' for automatic or float 0-1)"] = "otsu",
569
+ n_components: Annotated[int, "Number of principal components"] = 15,
570
+ n_clusters: Annotated[int, "Number of clusters for K-means"] = 2,
571
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
572
+ ) -> dict:
573
+ """
574
+ Apply TISSUE cell filtering for PCA-based clustering and visualization tasks.
575
+ Input is calibrated AnnData with conditions and output is PCA visualization and clustering results.
576
+ """
577
+
578
+ # Set output prefix
579
+ if out_prefix is None:
580
+ out_prefix = f"tissue_pca_{timestamp}"
581
+
582
+ # Load calibrated data
583
+ adata = ad.read_h5ad(adata_path)
584
+
585
+ # Create condition labels if they don't exist
586
+ if condition_key not in adata.obs.columns:
587
+ adata.obs[condition_key] = [group1 if i < round(adata.shape[0]/2) else group2
588
+ for i in range(adata.shape[0])]
589
+
590
+ # Apply TISSUE-filtered PCA
591
+ keep_idxs = tissue.downstream.filtered_PCA(
592
+ adata,
593
+ prediction_method,
594
+ proportion=filter_proportion,
595
+ stratification=adata.obs[condition_key].values,
596
+ n_components=n_components,
597
+ return_keep_idxs=True
598
+ )
599
+
600
+ # Filter to keep track of labels
601
+ adata_filtered = adata[adata.obs_names[keep_idxs], :].copy()
602
+
603
+ # Retrieve filtered PCA
604
+ pc_key = f"{prediction_method}_predicted_expression_PC{n_components}_filtered_"
605
+ PC_reduced = adata.uns[pc_key].copy()
606
+
607
+ print(f"PCA reduced data shape: {PC_reduced.shape}")
608
+
609
+ # Make 2D PCA plot
610
+ plt.figure(figsize=(10, 8))
611
+ plt.title("TISSUE-Filtered PCA")
612
+
613
+ group1_mask = adata_filtered.obs[condition_key] == group1
614
+ group2_mask = adata_filtered.obs[condition_key] == group2
615
+
616
+ plt.scatter(PC_reduced[group1_mask, 0], PC_reduced[group1_mask, 1],
617
+ c="tab:red", s=3, label=group1, alpha=0.7)
618
+ plt.scatter(PC_reduced[group2_mask, 0], PC_reduced[group2_mask, 1],
619
+ c="tab:blue", s=3, label=group2, alpha=0.7)
620
+ plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
621
+ plt.xlabel("PC 1")
622
+ plt.ylabel("PC 2")
623
+
624
+ # Save PCA plot
625
+ pca_fig_path = OUTPUT_DIR / f"{out_prefix}_filtered_pca.png"
626
+ plt.savefig(pca_fig_path, dpi=300, bbox_inches='tight')
627
+ plt.close()
628
+
629
+ # Perform K-means clustering on all principal components
630
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
631
+ clusters = kmeans.fit_predict(PC_reduced)
632
+
633
+ # Evaluate clustering with ARI
634
+ ari_score = adjusted_rand_score(adata_filtered.obs[condition_key], clusters)
635
+ print(f"Adjusted Rand Index: {ari_score}")
636
+
637
+ # Save PCA results
638
+ pca_results_df = pd.DataFrame(PC_reduced, columns=[f'PC{i+1}' for i in range(n_components)])
639
+ pca_results_df['cell_id'] = adata_filtered.obs_names
640
+ pca_results_df['condition'] = adata_filtered.obs[condition_key].values
641
+ pca_results_df['kmeans_cluster'] = clusters
642
+
643
+ pca_results_path = OUTPUT_DIR / f"{out_prefix}_pca_results.csv"
644
+ pca_results_df.to_csv(pca_results_path, index=False)
645
+
646
+ # Save clustering metrics
647
+ clustering_metrics_df = pd.DataFrame({
648
+ 'metric': ['n_cells_before_filtering', 'n_cells_after_filtering', 'n_components',
649
+ 'n_clusters', 'adjusted_rand_index'],
650
+ 'value': [adata.shape[0], adata_filtered.shape[0], n_components, n_clusters, ari_score]
651
+ })
652
+
653
+ metrics_path = OUTPUT_DIR / f"{out_prefix}_clustering_metrics.csv"
654
+ clustering_metrics_df.to_csv(metrics_path, index=False)
655
+
656
+ # Save filtered AnnData with PCA results
657
+ adata_filtered.obsm['X_pca_tissue_filtered'] = PC_reduced
658
+ adata_filtered.obs['kmeans_cluster'] = clusters
659
+
660
+ filtered_path = OUTPUT_DIR / f"{out_prefix}_pca_filtered_adata.h5ad"
661
+ adata_filtered.write_h5ad(filtered_path)
662
+
663
+ return {
664
+ "message": f"TISSUE-filtered PCA completed. ARI score: {ari_score:.3f} with {n_clusters} clusters",
665
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
666
+ "artifacts": [
667
+ {
668
+ "description": "TISSUE-filtered PCA visualization",
669
+ "path": str(pca_fig_path.resolve())
670
+ },
671
+ {
672
+ "description": "PCA results with clustering",
673
+ "path": str(pca_results_path.resolve())
674
+ },
675
+ {
676
+ "description": "Clustering performance metrics",
677
+ "path": str(metrics_path.resolve())
678
+ },
679
+ {
680
+ "description": "PCA-filtered AnnData object",
681
+ "path": str(filtered_path.resolve())
682
+ }
683
+ ]
684
+ }
685
+
686
+
687
+ @tissue_mcp.tool
688
+ def tissue_weighted_pca(
689
+ adata_path: Annotated[str, "Path to calibrated AnnData file from calibrate_uncertainties_and_prediction_intervals"],
690
+ prediction_method: Annotated[str, "Prediction method used (spage, tangram, harmony)"] = "spage",
691
+ condition_key: Annotated[str, "Key in adata.obs for condition labels"] = "condition",
692
+ group1: Annotated[str, "First group label"] = "A",
693
+ group2: Annotated[str, "Second group label"] = "B",
694
+ pca_method: Annotated[Literal["wpca", "standard"], "PCA method to use"] = "wpca",
695
+ weighting: Annotated[Literal["inverse_pi_width", "uniform"], "Weighting scheme for WPCA"] = "inverse_pi_width",
696
+ replace_inf: Annotated[Literal["max", "zero"], "How to handle infinite weights"] = "max",
697
+ binarize: Annotated[float, "Proportion for weight binarization"] = 0.2,
698
+ binarize_ratio: Annotated[float, "Ratio between high and low weights"] = 10,
699
+ n_components: Annotated[int, "Number of principal components"] = 15,
700
+ out_prefix: Annotated[str | None, "Output file prefix"] = None,
701
+ ) -> dict:
702
+ """
703
+ Perform TISSUE-WPCA (weighted principal component analysis) using uncertainty-based weights.
704
+ Input is calibrated AnnData with conditions and output is weighted PCA visualization and results.
705
+ """
706
+
707
+ # Set output prefix
708
+ if out_prefix is None:
709
+ out_prefix = f"tissue_wpca_{timestamp}"
710
+
711
+ # Load calibrated data
712
+ adata = ad.read_h5ad(adata_path)
713
+
714
+ # Create condition labels if they don't exist
715
+ if condition_key not in adata.obs.columns:
716
+ adata.obs[condition_key] = [group1 if i < round(adata.shape[0]/2) else group2
717
+ for i in range(adata.shape[0])]
718
+
719
+ # Perform weighted PCA
720
+ tissue.downstream.weighted_PCA(
721
+ adata, prediction_method,
722
+ pca_method=pca_method,
723
+ weighting=weighting,
724
+ replace_inf=replace_inf,
725
+ binarize=binarize,
726
+ binarize_ratio=binarize_ratio,
727
+ n_components=n_components
728
+ )
729
+
730
+ # Get weighted PCA results
731
+ wpca_key = f"{prediction_method}_predicted_expression_PC{n_components}_"
732
+ X_pc = adata.obsm[wpca_key]
733
+
734
+ # Make PC plot
735
+ plt.figure(figsize=(10, 8))
736
+ plt.title("TISSUE Weighted PCA")
737
+
738
+ group1_mask = adata.obs[condition_key] == group1
739
+ group2_mask = adata.obs[condition_key] == group2
740
+
741
+ plt.scatter(X_pc[group1_mask, 0], X_pc[group1_mask, 1],
742
+ c="tab:red", s=3, label=group1, alpha=0.7)
743
+ plt.scatter(X_pc[group2_mask, 0], X_pc[group2_mask, 1],
744
+ c="tab:blue", s=3, label=group2, alpha=0.7)
745
+ plt.xlabel("PC 1")
746
+ plt.ylabel("PC 2")
747
+ plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
748
+
749
+ # Save WPCA plot
750
+ wpca_fig_path = OUTPUT_DIR / f"{out_prefix}_weighted_pca.png"
751
+ plt.savefig(wpca_fig_path, dpi=300, bbox_inches='tight')
752
+ plt.close()
753
+
754
+ # Save WPCA results
755
+ wpca_results_df = pd.DataFrame(X_pc, columns=[f'WPC{i+1}' for i in range(n_components)])
756
+ wpca_results_df['cell_id'] = adata.obs_names
757
+ wpca_results_df['condition'] = adata.obs[condition_key].values
758
+
759
+ wpca_results_path = OUTPUT_DIR / f"{out_prefix}_wpca_results.csv"
760
+ wpca_results_df.to_csv(wpca_results_path, index=False)
761
+
762
+ # Save WPCA parameters
763
+ params_df = pd.DataFrame({
764
+ 'parameter': ['pca_method', 'weighting', 'replace_inf', 'binarize',
765
+ 'binarize_ratio', 'n_components'],
766
+ 'value': [pca_method, weighting, replace_inf, binarize, binarize_ratio, n_components]
767
+ })
768
+
769
+ params_path = OUTPUT_DIR / f"{out_prefix}_wpca_parameters.csv"
770
+ params_df.to_csv(params_path, index=False)
771
+
772
+ # Save AnnData with WPCA results
773
+ adata.obsm['X_wpca_tissue'] = X_pc
774
+
775
+ wpca_adata_path = OUTPUT_DIR / f"{out_prefix}_wpca_adata.h5ad"
776
+ adata.write_h5ad(wpca_adata_path)
777
+
778
+ return {
779
+ "message": f"TISSUE weighted PCA completed with {weighting} weighting and {n_components} components",
780
+ "reference": "https://github.com/sunericd/TISSUE/README.md",
781
+ "artifacts": [
782
+ {
783
+ "description": "TISSUE weighted PCA visualization",
784
+ "path": str(wpca_fig_path.resolve())
785
+ },
786
+ {
787
+ "description": "Weighted PCA results",
788
+ "path": str(wpca_results_path.resolve())
789
+ },
790
+ {
791
+ "description": "WPCA parameters used",
792
+ "path": str(params_path.resolve())
793
+ },
794
+ {
795
+ "description": "WPCA AnnData object",
796
+ "path": str(wpca_adata_path.resolve())
797
+ }
798
+ ]
799
+ }