double temp = output[(3*k+1)*row*col+ i * col +j - k*col]; instead of double temp = output[(3*k+1)*row*col+ i * col +j];