Skip to content

Commit

Permalink
Update gexp_cov_adjust.py
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangdb authored May 20, 2022
1 parent 9f2cb4f commit 18f244c
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions script/adj/gexp_cov_adjust.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='adjust the gene expression by the covariates file,'
'output Gexp.data: sample * gene, pure matrix without any col or row names')
description='Adjust the gene expressions by covariates,'
'output Gexp.data as a #sample * #gene matrix without column or row names')
# genepos: Column 1 is chr#; Column 2 is start pos; Column 3 is end pos;
parser.add_argument(
'--expr', help='the expression matrix for the first cohort, in *.bed.gz file format')
parser.add_argument('--covf',help='the covariates file, format: cov ID * sample, with header ID, sample1, sample2...')
'--expr', help='expression matrix for the first cohort in *.bed.gz file format')
parser.add_argument('--covf',help='covariates file in format: #covs * #samples with headers cov ID, sample1, sample2...')
parser.add_argument(
'--prefix', help='the prefix for the output file 1,output *.gexp.data')
'--prefix', help='prefix for the output file *.gexp.data')
args = parser.parse_args()

print('start to loading data')
print('the expression file is '+ args.expr)
print('the covariates file is '+ args.covf)
print('the output file is '+ args.prefix+'.gexp.data')
print('processing .........')
print('Loading data...')
print(' The expression file is '+ args.expr)
print(' The covariates file is '+ args.covf)
print(' The output file is '+ args.prefix+'.gexp.data')
print('Processing...')
expr = pd.read_csv(args.expr, sep='\t', index_col=None)
# set the index to the gene_id
expr.index = expr.gene_id
Expand All @@ -43,7 +43,7 @@
result = model.fit()
exp_adjusted[:,i] = result.resid
if(i % 5000 == 0):
print('processed %d markers' % i)
print('done!')
print(' %d markers processed' % i)
print(' Completed!')
exp_adjusted = pd.DataFrame(exp_adjusted)
exp_adjusted.to_csv(args.prefix + '.gexp.data',sep='\t', index=False,header=False)
exp_adjusted.to_csv(args.prefix + '.gexp.data',sep='\t', index=False,header=False)

0 comments on commit 18f244c

Please sign in to comment.