User Tools

Site Tools


genetica:preproc_models

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
genetica:preproc_models [2013/05/16 15:55]
osotolongo [Alternativa a Fisher: Barnard Test]
genetica:preproc_models [2015/02/25 16:35]
osotolongo
Line 6: Line 6:
  
 <code> <code>
-plink plink --bfile archivo --model --allow-no-sex --out archivo+$ plink --bfile archivo --model --allow-no-sex --out archivo
 </code> </code>
  
Line 37: Line 37:
 # Copyright 2013 O. Sotolongo <osotolongo@fundacioace.com> # Copyright 2013 O. Sotolongo <osotolongo@fundacioace.com>
  
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 2 of the License, or
 +# (at your option) any later version.
 +#
 +# This program is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 use strict; use warnings; use strict; use warnings;
 use File::Slurp qw(read_file); use File::Slurp qw(read_file);
Line 47: Line 57:
  
 my $ofile = $plist; my $ofile = $plist;
-$ofile =~ s/(.*)\.(.*)/$1\.$model\.recalc\.$2/; +$ofile =~ s/(.*)\.(.*)/$1\.$model\.recalc2\.$2/; 
- +# cambiar a clustering con (?:PATTERN) 
-my %input_data = reverse map {/^(.*\s+(rs\d+)\s+.*)$/} grep {/^(.*\s+$model\s+.*)$/} read_file $plist; +my %input_data = reverse map {/^(\s*\d+\s+(\S+)\s+[A,T,C,G]+\s+[A,T,C,G]+\s+$model\s+.*)$/} grep {/^(.*\s+$model\s+.*)$/} read_file $plist; 
-my %bim_data = map {/^\d+\s+(rs\d+)\s+\d+\.*\d*\s+(\d+)\s+[A,T,C,G]+\s+[A,T,C,G]+\s*$/} grep {/.*rs\d+.*/} read_file $bimfile;+my %bim_data = map {/^\d+\s+(.*)\s+\d+\.*\d*\s+(\d+)\s+[A,T,C,G]+\s+[A,T,C,G]+\s*$/} read_file $bimfile;
 my %cells; my %cells;
 foreach my $marker (sort keys %input_data){ foreach my $marker (sort keys %input_data){
- (@{$cells{$marker}} {qw/chr allele1 allele2 affa1 affa2 unaffa1 unaffa2 chi2 df pval0/}) = $input_data{$marker} =~ /^\s*(\d+)\s+rs\d+\s+([A,T,C,G])+\s+([A,T,C,G])+\s+$model\s+(\d+)\/(\d+)\s+(\d+)\/(\d+)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s*$/;+ (@{$cells{$marker}} {qw/chr allele1 allele2 affa1 affa2 unaffa1 unaffa2 chi2 df pval0/}) = $input_data{$marker} =~ /^\s*(\d+)\s+$marker\s+([A,T,C,G])+\s+([A,T,C,G])+\s+$model\s+(\d+)\/(\d+)\s+(\d+)\/(\d+)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s+(\d+\.*\d*e*-*\d*|NA)\s*$/;
  if(exists($cells{$marker}{affa1}) && exists($cells{$marker}{affa2}) && exists($cells{$marker}{unaffa1}) && exists($cells{$marker}{unaffa2})){  if(exists($cells{$marker}{affa1}) && exists($cells{$marker}{affa2}) && exists($cells{$marker}{unaffa1}) && exists($cells{$marker}{unaffa2})){
  my $affa1 = $cells{$marker}{affa1};  my $affa1 = $cells{$marker}{affa1};
Line 79: Line 89:
  
 foreach my $marker (sort {($cells{$a}->{chr} <=> $cells{$b}->{chr}) or ($a cmp $b)} keys %input_data){ foreach my $marker (sort {($cells{$a}->{chr} <=> $cells{$b}->{chr}) or ($a cmp $b)} keys %input_data){
- print ODF "$cells{$marker}{chr}\t$marker\t$bim_data{$marker}\t$cells{$marker}{allele1}\t$cells{$marker}{allele2}\t$cells{$marker}{afreq}\t$cells{$marker}{ufreq}\t$cells{$marker}{chi2}\t$cells{$marker}{df}\t$cells{$marker}{pval0}\t$cells{$marker}{oddsratio}\t$cells{$marker}{stderr}\t$cells{$marker}{pvalue}\n";+ if(exists($bim_data{$marker})){ 
 + print ODF "$cells{$marker}{chr}\t$marker\t$bim_data{$marker}\t$cells{$marker}{allele1}\t$cells{$marker}{allele2}\t$cells{$marker}{afreq}\t$cells{$marker}{ufreq}\t$cells{$marker}{chi2}\t$cells{$marker}{df}\t$cells{$marker}{pval0}\t$cells{$marker}{oddsratio}\t$cells{$marker}{stderr}\t$cells{$marker}{pvalue}\n"; 
 + }
 } }
  
Line 145: Line 157:
 $cells{$marker}{pvalue} = shit->get('x'); $cells{$marker}{pvalue} = shit->get('x');
 </code> </code>
 +
 +**OJO:** Esto demora muchisimo. Despues de 5 días de calculo sobre una base de datos y modelo dominate tuve que para la ejecucion. Lo lanzare cuando tenga tiempo y la base de datos a usar este definida.
genetica/preproc_models.txt · Last modified: 2020/08/04 10:58 (external edit)