/* * getnet.c * * Copyright 2013 O. Sotolongo * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. * * */ #include #include #include #define IMDIM 3 #define NPROC 1024 #define imin(a,b) (aimd[0]); fscanf(f, "%d", &img->imd[1]); fscanf(f, "%d", &img->imd[2]); fscanf(f, "%f", &img->pixd[0]); fscanf(f, "%f", &img->pixd[1]); fscanf(f, "%f", &img->pixd[2]); fclose(f); return; } __device__ float distance (int i, int j){ int k = abs(i-j); int x,y,z; int xy; z = (int) k/shape.plane; xy = k%shape.plane; y = (int) xy/shape.imd[0]; x = xy%shape.imd[0]; return shape.pixd[0]*x*shape.pixd[0]*x + shape.pixd[1]*y*shape.pixd[1]*y + shape.pixd[2]*z*shape.pixd[2]*z; } __global__ void network (float *matrix, float *net){ __shared__ float tnet[NPROC]; int myel = blockIdx.x + blockIdx.y*shape.imd[0]+ blockIdx.z*shape.plane; int extel = threadIdx.x; int sindex = threadIdx.x; float temp = 0; while (extel0 && matrix[extel]>0){ temp += matrix[myel]*matrix[extel]/distance(myel,extel); } extel += blockDim.x; } tnet[sindex] = temp; __syncthreads(); size_t i = blockDim.x/2; while (i!=0) { if (sindex < i) { tnet[sindex] += tnet[sindex + i]; } __syncthreads(); i/=2; } if (sindex == 0){ net[myel] = tnet[0]; } return; } int main(int argc, char **argv){ float *x, *x_dev, *net, *net_dev; image_info *tmpd = (image_info*) malloc (sizeof(image_info)); char img_name[20] = "", results[20] = "", info[20] = "", matrix[20] = ""; FILE *odf; cudaDeviceProp prop; strcpy(img_name, argv[1]); strcpy(results, img_name); strcpy(info, img_name); strcpy(matrix, img_name); strcat(results, ".net"); strcat(info, ".info"); strcat(matrix, ".asc"); get_img_info(info, tmpd); tmpd->plane = tmpd->imd[0]*tmpd->imd[1]; tmpd->vol = tmpd->plane*tmpd->imd[2]; // Definir los arrays en host x = (float *) malloc(tmpd->vol*sizeof(float)); net = (float *) malloc(tmpd->vol*sizeof(float)); // Definir los arrays en device cudaMalloc((void **)&x_dev, tmpd->vol*sizeof(float)); cudaMalloc((void **)&net_dev, tmpd->vol*sizeof(float)); cudaMalloc((void **)&shape, sizeof(image_info)); get_img(matrix, x); // Cojo la primera (por ahora) GPU y guardo las propiedades cudaGetDeviceProperties( &prop, 0 ); dim3 numthreads(prop.maxThreadsPerBlock,1,1); dim3 numgrids(tmpd->imd[0],tmpd->imd[1],tmpd->imd[2]); // Copiar a memoria constante las caracteristicas de la imagen cudaMemcpyToSymbol(shape, tmpd, sizeof(image_info)); // Copiar a device la matriz cudaMemcpy(x_dev, x, tmpd->vol*sizeof(float), cudaMemcpyHostToDevice); // the real deal network<<>> (x_dev, net_dev); // Copiar a host el resultado cudaMemcpy(net, net_dev, tmpd->vol*sizeof(float), cudaMemcpyDeviceToHost); //write network to disk odf = fopen(results, "w"); int i; for (i = 0; ivol; i++){ fprintf(odf, "%f ", net[i]); } fclose(odf); //Free the mallocs! free(x); free(net); cudaFree(x_dev); cudaFree(net_dev); cudaDeviceReset(); return 0; }