1111import dill
1212import networkx as nx
1313import numpy as np
14+ from scipy .special import logsumexp
1415import pandas as pd
1516import pyranges
1617import pybiomart
@@ -266,12 +267,12 @@ def export_for_fgwas(
266267
267268def _calc_per_region_bf (region_id : str , region_df : pd .DataFrame ) -> pd .Series :
268269 """
269- calculate regional Bayes factors per group (used in `load_fgwas_scores`)
270+ calculate regional log Bayes factors per group (used in `load_fgwas_scores`)
270271 """
271- w = np . exp ( region_df ["SNP_rel_loc" ])
272- bf = np . exp (region_df ["SNP_BF " ])
273- rbf = ( bf * w ). sum () / w . sum ()
274- return pd .Series (rbf , index = [region_id ])
272+ log_numerator = logsumexp ( region_df [ "SNP_BF" ] + region_df ["SNP_rel_loc" ])
273+ log_denominator = logsumexp (region_df ["SNP_rel_loc " ])
274+ log_bf = log_numerator - log_denominator
275+ return pd .Series (log_bf , index = [region_id ])
275276
276277
277278@add_logger ()
@@ -322,7 +323,7 @@ def load_fgwas_scores(
322323 num_cores = snp2cell .NCPU
323324 log .info (f"using { num_cores } cores" )
324325
325- # load fgwas output
326+ # load fgwas output: SNP_BF (log BF), SNP_rel_loc (log weight including distance and LD score)
326327 df = pd .read_csv (fgwas_output_path , sep = "\t " , header = None )
327328 df .columns = ["regionID" , "SNP_BF" , "SNP_rel_loc" ]
328329
@@ -332,20 +333,18 @@ def load_fgwas_scores(
332333
333334 # add region information from region_loc_path
334335 region_info = pd .read_csv (region_loc_path , sep = "\t " )
335- region_info ["RBF " ] = region_info .index .map (res )
336+ region_info ["log_RBF " ] = region_info .index .map (res )
336337 region_info ["name" ] = region_info .apply (
337338 lambda r : f"chr{ int (r ['hm_chr' ])} :{ int (r ['hm_pos' ])} -{ int (r ['hm_pos' ])} " , axis = 1
338339 )
339340 region_info ["ID" ] = region_info .index
340341
341- region_info [["name" , "ID" , "hm_chr" , "hm_pos" , "RBF " ]].to_csv (
342+ region_info [["name" , "ID" , "hm_chr" , "hm_pos" , "log_RBF " ]].to_csv (
342343 rbf_table_path , sep = "\t " , index = False
343344 )
344345
345- # calculate log RBF scores
346- scores = (
347- region_info .set_index ("name" )["RBF" ].apply (np .log ).sort_values (ascending = False )
348- )
346+ # prepare log RBF scores
347+ scores = region_info .set_index ("name" )["log_RBF" ].sort_values (ascending = False )
349348
350349 def rename (s ):
351350 # expand region to full length
0 commit comments