#include #include #define IMDIM 3 #define NPROC 1024 #define imin(a,b) (aimd[0]); fscanf(f, "%d", &img->imd[1]); fscanf(f, "%d", &img->imd[2]); fscanf(f, "%f", &img->pixd[0]); fscanf(f, "%f", &img->pixd[1]); fscanf(f, "%f", &img->pixd[2]); fclose(f); return; } __device__ float distance (int i, int j){ int k = abs(i-j); int x,y,z; int xy; z = (int) k/shape.plane; xy = k%shape.plane; y = (int) xy/shape.imd[0]; x = xy%shape.imd[0]; return shape.pixd[0]*x*shape.pixd[0]*x + shape.pixd[1]*y*shape.pixd[1]*y + shape.pixd[2]*z*shape.pixd[2]*z; } __global__ void network (float *matrix, float *net){ int myel = blockIdx.x + blockIdx.y*shape.imd[0]+ blockIdx.z*shape.plane; int i; float temp = 0; //printf("myel: %d, %d\n", myel, shape.vol); for(i=0; i0 && matrix[i]>0){ temp += matrix[myel]*matrix[i]/distance(myel,i); } } net[myel] = temp; return; } int main(int argc, char **argv){ float *x, *x_dev, *net, *net_dev; image_info *tmpd = (image_info*) malloc (sizeof(image_info)); char img_name[20] = "", results[20] = "", info[20] = "", matrix[20] = ""; FILE *odf; cudaDeviceProp prop; strcpy(img_name, argv[1]); strcpy(results, img_name); strcpy(info, img_name); strcpy(matrix, img_name); strcat(results, ".net"); strcat(info, ".info"); strcat(matrix, ".asc"); get_img_info(info, tmpd); tmpd->plane = tmpd->imd[0]*tmpd->imd[1]; tmpd->vol = tmpd->plane*tmpd->imd[2]; // Definir los arrays en host x = (float *) malloc(tmpd->vol*sizeof(float)); net = (float *) malloc(tmpd->vol*sizeof(float)); // Definir los arrays en device cudaMalloc((void **)&x_dev, tmpd->vol*sizeof(float)); cudaMalloc((void **)&net_dev, tmpd->vol*sizeof(float)); cudaMalloc((void **)&shape, sizeof(image_info)); get_img(matrix, x); dim3 numgrids(tmpd->imd[0],tmpd->imd[1],tmpd->imd[2]); // Copiar a memoria constante las caracteristicas de la imagen cudaMemcpyToSymbol(shape, tmpd, sizeof(image_info)); // Copiar a device la matriz cudaMemcpy(x_dev, x, tmpd->vol*sizeof(float), cudaMemcpyHostToDevice); // the real deal network<<>> (x_dev, net_dev); // Copiar a host el resultado cudaMemcpy(net, net_dev, tmpd->vol*sizeof(float), cudaMemcpyDeviceToHost); //write network to disk odf = fopen(results, "w"); int i; for (i = 0; ivol; i++){ fprintf(odf, "%f\n ", net[i]); } fclose(odf); //Free the mallocs! free(x); free(net); cudaFree(x_dev); cudaFree(net_dev); cudaDeviceReset(); return 0; }