#!/usr/bin/perl # Copyright 2013 O. Sotolongo use strict; use warnings; use Parallel::ForkManager; use File::Slurp qw(read_file); use File::Remove 'remove'; use File::Basename qw(basename); use Data::Dump qw(dump); my $debug = 1; #my $library = '/home/data/GNlib/1000Genome/CEU_all/1000genome_CEU_merged.map'; my $library = '/home/osotolongo/data/GNlib/1000Genome/EUR/1000genome_eur.bim'; my $db = shift; die "Must supply database\n" unless $db; my $odb = $db; $odb =~ s/(.*)\.(.*)/$1_freshed\.$2/; print "I will try to write results to $odb\n"; my %genmap; my %data_line = reverse map {/^(.*\s+(rs\d+)\s+.*)$/} grep {/^(.*\s+(rs\d+)\s+.*)$/} read_file $db; print "Data stored, lets try to understand it\n"; foreach my $marker (sort keys %data_line){ (@{$genmap{$marker}} {qw/chr data position/}) = $data_line{$marker} =~ /^(\d+)\s+rs\d+\s+(.*)\s+(\d+)$/; } print "Data read, lets work now\n"; open(IN, "<$library") || die "can't open $library"; while(){ if (my ($libmarker, $libpos) = /^\d+\s+(rs\d+)\s+.*\s+(\d+)\s+.*/){ if(exists $genmap{$libmarker}) { $genmap{$libmarker}{position} = $libpos; } } } close IN; print "Writing to $odb\n"; open ODF, ">$odb" || die "Could not open output file"; foreach my $marker (sort {($genmap{$a}->{chr} <=> $genmap{$b}->{chr}) or ($genmap{$a}->{position} <=> $genmap{$b}->{position})} keys %genmap){ print ODF "$genmap{$marker}{chr}\t$marker\t$genmap{$marker}{data}\t$genmap{$marker}{position}\n"; } close ODF;