@@ -340,6 +340,19 @@ def load_fgwas_scores(
340340 df = pd .read_csv (fgwas_output_path , sep = "\t " , header = None )
341341 df .columns = ["regionID" , "SNP_BF" , "SNP_rel_loc" ]
342342
343+ # load region locations
344+ log .info (f"loading region locations from '{ region_loc_path } '" )
345+ region_info = pd .read_csv (region_loc_path , sep = "\t " )
346+ region_info .index += 1
347+
348+ if region_info .shape [0 ] != df ["regionID" ].max ():
349+ log .warning (
350+ f"largest region ID in fgwas output ({ df ['regionID' ].max ()} )\n "
351+ f"number of regions in region location file ({ region_info .shape [0 ]} )\n "
352+ "are you sure the region location file corresponds to the fgwas output?"
353+ )
354+
355+ # calculate regional Bayes factors
343356 log .info (f"calculating regional Bayes factors" )
344357 region_groups = list (df .groupby ("regionID" ))
345358 with mp .Pool (num_cores ) as pool :
@@ -353,8 +366,7 @@ def load_fgwas_scores(
353366 res = pd .concat (res )
354367
355368 # add region information from region_loc_path
356- log .info (f"loading region locations from '{ region_loc_path } '" )
357- region_info = pd .read_csv (region_loc_path , sep = "\t " )
369+ log .info (f"adding region information to scores" )
358370 region_info ["log_RBF" ] = region_info .index .map (res )
359371 region_info ["name" ] = region_info .apply (
360372 lambda r : f"chr{ int (r ['hm_chr' ])} :{ int (r ['hm_pos' ])} -{ int (r ['hm_pos' ])} " , axis = 1
0 commit comments