#!/usr/bin/perl # Copyright 2013 O. Sotolongo # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # use strict; use warnings; use File::Slurp qw(read_file); use Text::NSP::Measures::2D::Fisher::twotailed; my $plist = shift; my $model = shift; my $bimfile = shift; die "usage: models.pl model_file model_tla bim_file\n" unless ($plist && $model && $bimfile); my $ofile = $plist; $ofile =~ s/(.*)\.(.*)/$1\.$model\.recalc2\.$2/; # cambiar a clustering con (?:PATTERN) my %input_data = reverse map {/^(\s*\d+\s+(\S+)\s+[A,T,C,G]+\s+[A,T,C,G]+\s+$model\s+.*)$/} grep {/^(.*\s+$model\s+.*)$/} read_file $plist; my %bim_data = map {/^\d+\s+(.*)\s+\d+\.*\d*\s+(\d+)\s+[A,T,C,G]+\s+[A,T,C,G]+\s*$/} read_file $bimfile; my %cells; foreach my $marker (sort keys %input_data){ (@{$cells{$marker}} {qw/chr allele1 allele2 affa1 affa2 unaffa1 unaffa2 chi2 df pval0/}) = $input_data{$marker} =~ /^\s*(\d+)\s+$marker\s+([A,T,C,G])+\s+([A,T,C,G])+\s+$model\s+(\d+)\/(\d+)\s+(\d+)\/(\d+)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s*$/; if(exists($cells{$marker}{affa1}) && exists($cells{$marker}{affa2}) && exists($cells{$marker}{unaffa1}) && exists($cells{$marker}{unaffa2})){ my $affa1 = $cells{$marker}{affa1}; my $affa2 = $cells{$marker}{affa2}; my $unaffa1 = $cells{$marker}{unaffa1}; my $unaffa2 = $cells{$marker}{unaffa2}; my $t1 = $affa1 + $affa2; my $t2 = $affa1 + $unaffa1; my $t3 = $t1 + $unaffa1 + $unaffa2; $cells{$marker}{pvalue} = calculateStatistic(n11=>$affa1, n1p=>$t1, np1=>$t2, npp=>$t3); $affa1 = 0.1 unless ($affa1); $affa2 = 0.1 unless ($affa2); $unaffa1 = 0.1 unless ($unaffa1); $unaffa2 = 0.1 unless ($unaffa2); $cells{$marker}{afreq} = $affa1/($affa1+$affa2); $cells{$marker}{ufreq} = $unaffa1/($unaffa1+$unaffa2); $cells{$marker}{oddsratio} = ($affa1*$unaffa2)/($affa2*$unaffa1); $cells{$marker}{stderr} = sqrt((1/$affa1)+(1/$affa2)+(1/$unaffa1)+(1/$unaffa2)); } } my $head = "CHR\tSNP\tBP\tA1\tA2\tF_A\tF_U\tCHISQ\tDF\tP0\tOR\tSE\tP"; open ODF, ">$ofile" or die "Could not open output file\n"; print ODF "$head\n"; foreach my $marker (sort {($cells{$a}->{chr} <=> $cells{$b}->{chr}) or ($a cmp $b)} keys %input_data){ if(exists($bim_data{$marker})){ print ODF "$cells{$marker}{chr}\t$marker\t$bim_data{$marker}\t$cells{$marker}{allele1}\t$cells{$marker}{allele2}\t$cells{$marker}{afreq}\t$cells{$marker}{ufreq}\t$cells{$marker}{chi2}\t$cells{$marker}{df}\t$cells{$marker}{pval0}\t$cells{$marker}{oddsratio}\t$cells{$marker}{stderr}\t$cells{$marker}{pvalue}\n"; } } close ODF;