Skip to content

Commit 1493734

Browse files
committed
check in load_fgwas_scores
1 parent 16de173 commit 1493734

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

snp2cell/util.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,19 @@ def load_fgwas_scores(
340340
df = pd.read_csv(fgwas_output_path, sep="\t", header=None)
341341
df.columns = ["regionID", "SNP_BF", "SNP_rel_loc"]
342342

343+
# load region locations
344+
log.info(f"loading region locations from '{region_loc_path}'")
345+
region_info = pd.read_csv(region_loc_path, sep="\t")
346+
region_info.index += 1
347+
348+
if region_info.shape[0] != df["regionID"].max():
349+
log.warning(
350+
f"largest region ID in fgwas output ({df['regionID'].max()})\n"
351+
f"number of regions in region location file ({region_info.shape[0]})\n"
352+
"are you sure the region location file corresponds to the fgwas output?"
353+
)
354+
355+
# calculate regional Bayes factors
343356
log.info(f"calculating regional Bayes factors")
344357
region_groups = list(df.groupby("regionID"))
345358
with mp.Pool(num_cores) as pool:
@@ -353,8 +366,7 @@ def load_fgwas_scores(
353366
res = pd.concat(res)
354367

355368
# add region information from region_loc_path
356-
log.info(f"loading region locations from '{region_loc_path}'")
357-
region_info = pd.read_csv(region_loc_path, sep="\t")
369+
log.info(f"adding region information to scores")
358370
region_info["log_RBF"] = region_info.index.map(res)
359371
region_info["name"] = region_info.apply(
360372
lambda r: f"chr{int(r['hm_chr'])}:{int(r['hm_pos'])}-{int(r['hm_pos'])}", axis=1

tests/test_util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,10 @@ def test_export_for_fgwas(snp2cell_instance, tmp_path):
7373

7474
def test_load_fgwas_scores(snp2cell_instance, tmp_path):
7575
# Create a temporary fgwas output file with two rows.
76-
fgwas_output_path = tmp_path / "fgwas_output.txt"
77-
with open(fgwas_output_path, "w") as f:
78-
f.write(f"0\t{np.log(2)}\t0\n1\t{np.log(3)}\t0\n")
76+
fgwas_output_path = tmp_path / "fgwas_output.gz"
77+
# TODO: Replace with a more realistic example.
78+
df = pd.DataFrame([[1, np.log(2), 0], [2, np.log(3), 0]])
79+
df.to_csv(fgwas_output_path, sep="\t", header=False, index=False)
7980

8081
# Create a temporary region location file with header (as in export_for_fgwas).
8182
region_loc_path = tmp_path / "region_loc.txt"

0 commit comments

Comments
 (0)