added validation set and pre-trained model; tested on pytorch1.8
This commit is contained in:
+28
-28
@@ -12,17 +12,17 @@ void iterate_cpu(FunctorT functor, int N) {
|
||||
}
|
||||
|
||||
at::Tensor nn_cpu(at::Tensor in0, at::Tensor in1) {
|
||||
CHECK_INPUT_CPU(in0)
|
||||
CHECK_INPUT_CPU(in1)
|
||||
CHECK_INPUT_CPU(in0);
|
||||
CHECK_INPUT_CPU(in1);
|
||||
|
||||
auto nelem0 = in0.size(0);
|
||||
auto nelem1 = in1.size(0);
|
||||
auto dim = in0.size(1);
|
||||
|
||||
AT_ASSERTM(dim == in1.size(1), "in0 and in1 have to be the same shape")
|
||||
AT_ASSERTM(dim == 3, "dim hast to be 3")
|
||||
AT_ASSERTM(in0.dim() == 2, "in0 has to be N0 x 3")
|
||||
AT_ASSERTM(in1.dim() == 2, "in1 has to be N1 x 3")
|
||||
AT_ASSERTM(dim == in1.size(1), "in0 and in1 have to be the same shape");
|
||||
AT_ASSERTM(dim == 3, "dim hast to be 3");
|
||||
AT_ASSERTM(in0.dim() == 2, "in0 has to be N0 x 3");
|
||||
AT_ASSERTM(in1.dim() == 2, "in1 has to be N1 x 3");
|
||||
|
||||
auto out = at::empty({nelem0}, torch::CPU(at::kLong));
|
||||
|
||||
@@ -37,11 +37,11 @@ at::Tensor nn_cpu(at::Tensor in0, at::Tensor in1) {
|
||||
|
||||
|
||||
at::Tensor crosscheck_cpu(at::Tensor in0, at::Tensor in1) {
|
||||
CHECK_INPUT_CPU(in0)
|
||||
CHECK_INPUT_CPU(in1)
|
||||
CHECK_INPUT_CPU(in0);
|
||||
CHECK_INPUT_CPU(in1);
|
||||
|
||||
AT_ASSERTM(in0.dim() == 1, "")
|
||||
AT_ASSERTM(in1.dim() == 1, "")
|
||||
AT_ASSERTM(in0.dim() == 1, "");
|
||||
AT_ASSERTM(in1.dim() == 1, "");
|
||||
|
||||
auto nelem0 = in0.size(0);
|
||||
auto nelem1 = in1.size(0);
|
||||
@@ -57,21 +57,21 @@ at::Tensor crosscheck_cpu(at::Tensor in0, at::Tensor in1) {
|
||||
|
||||
|
||||
at::Tensor proj_nn_cpu(at::Tensor xyz0, at::Tensor xyz1, at::Tensor K, int patch_size) {
|
||||
CHECK_INPUT_CPU(xyz0)
|
||||
CHECK_INPUT_CPU(xyz1)
|
||||
CHECK_INPUT_CPU(K)
|
||||
CHECK_INPUT_CPU(xyz0);
|
||||
CHECK_INPUT_CPU(xyz1);
|
||||
CHECK_INPUT_CPU(K);
|
||||
|
||||
auto batch_size = xyz0.size(0);
|
||||
auto height = xyz0.size(1);
|
||||
auto width = xyz0.size(2);
|
||||
|
||||
AT_ASSERTM(xyz0.size(0) == xyz1.size(0), "")
|
||||
AT_ASSERTM(xyz0.size(1) == xyz1.size(1), "")
|
||||
AT_ASSERTM(xyz0.size(2) == xyz1.size(2), "")
|
||||
AT_ASSERTM(xyz0.size(3) == xyz1.size(3), "")
|
||||
AT_ASSERTM(xyz0.size(3) == 3, "")
|
||||
AT_ASSERTM(xyz0.dim() == 4, "")
|
||||
AT_ASSERTM(xyz1.dim() == 4, "")
|
||||
AT_ASSERTM(xyz0.size(0) == xyz1.size(0), "");
|
||||
AT_ASSERTM(xyz0.size(1) == xyz1.size(1), "");
|
||||
AT_ASSERTM(xyz0.size(2) == xyz1.size(2), "");
|
||||
AT_ASSERTM(xyz0.size(3) == xyz1.size(3), "");
|
||||
AT_ASSERTM(xyz0.size(3) == 3, "");
|
||||
AT_ASSERTM(xyz0.dim() == 4, "");
|
||||
AT_ASSERTM(xyz1.dim() == 4, "");
|
||||
|
||||
auto out = at::empty({batch_size, height, width}, torch::CPU(at::kLong));
|
||||
|
||||
@@ -86,8 +86,8 @@ at::Tensor proj_nn_cpu(at::Tensor xyz0, at::Tensor xyz1, at::Tensor K, int patch
|
||||
|
||||
|
||||
at::Tensor xcorrvol_cpu(at::Tensor in0, at::Tensor in1, int n_disps, int block_size) {
|
||||
CHECK_INPUT_CPU(in0)
|
||||
CHECK_INPUT_CPU(in1)
|
||||
CHECK_INPUT_CPU(in0);
|
||||
CHECK_INPUT_CPU(in1);
|
||||
|
||||
auto channels = in0.size(0);
|
||||
auto height = in0.size(1);
|
||||
@@ -108,8 +108,8 @@ at::Tensor xcorrvol_cpu(at::Tensor in0, at::Tensor in1, int n_disps, int block_s
|
||||
|
||||
|
||||
at::Tensor photometric_loss_forward(at::Tensor es, at::Tensor ta, int block_size, int type, float eps) {
|
||||
CHECK_INPUT_CPU(es)
|
||||
CHECK_INPUT_CPU(ta)
|
||||
CHECK_INPUT_CPU(es);
|
||||
CHECK_INPUT_CPU(ta);
|
||||
|
||||
auto batch_size = es.size(0);
|
||||
auto channels = es.size(1);
|
||||
@@ -145,16 +145,16 @@ at::Tensor photometric_loss_forward(at::Tensor es, at::Tensor ta, int block_size
|
||||
}
|
||||
|
||||
at::Tensor photometric_loss_backward(at::Tensor es, at::Tensor ta, at::Tensor grad_out, int block_size, int type, float eps) {
|
||||
CHECK_INPUT_CPU(es)
|
||||
CHECK_INPUT_CPU(ta)
|
||||
CHECK_INPUT_CPU(grad_out)
|
||||
CHECK_INPUT_CPU(es);
|
||||
CHECK_INPUT_CPU(ta);
|
||||
CHECK_INPUT_CPU(grad_out);
|
||||
|
||||
auto batch_size = es.size(0);
|
||||
auto channels = es.size(1);
|
||||
auto height = es.size(2);
|
||||
auto width = es.size(3);
|
||||
|
||||
CHECK_INPUT_CPU(ta)
|
||||
CHECK_INPUT_CPU(ta);
|
||||
auto grad_in = at::zeros({batch_size, channels, height, width}, grad_out.options());
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(es.scalar_type(), "photometric_loss_backward_cpu", ([&] {
|
||||
|
||||
+27
-27
@@ -7,16 +7,16 @@
|
||||
void nn_kernel(at::Tensor in0, at::Tensor in1, at::Tensor out);
|
||||
|
||||
at::Tensor nn_cuda(at::Tensor in0, at::Tensor in1) {
|
||||
CHECK_INPUT_CUDA(in0)
|
||||
CHECK_INPUT_CUDA(in1)
|
||||
CHECK_INPUT_CUDA(in0);
|
||||
CHECK_INPUT_CUDA(in1);
|
||||
|
||||
auto nelem0 = in0.size(0);
|
||||
auto dim = in0.size(1);
|
||||
|
||||
AT_ASSERTM(dim == in1.size(1), "in0 and in1 have to be the same shape")
|
||||
AT_ASSERTM(dim == 3, "dim hast to be 3")
|
||||
AT_ASSERTM(in0.dim() == 2, "in0 has to be N0 x 3")
|
||||
AT_ASSERTM(in1.dim() == 2, "in1 has to be N1 x 3")
|
||||
AT_ASSERTM(dim == in1.size(1), "in0 and in1 have to be the same shape");
|
||||
AT_ASSERTM(dim == 3, "dim hast to be 3");
|
||||
AT_ASSERTM(in0.dim() == 2, "in0 has to be N0 x 3");
|
||||
AT_ASSERTM(in1.dim() == 2, "in1 has to be N1 x 3");
|
||||
|
||||
auto out = at::empty({nelem0}, torch::CUDA(at::kLong));
|
||||
|
||||
@@ -29,11 +29,11 @@ at::Tensor nn_cuda(at::Tensor in0, at::Tensor in1) {
|
||||
void crosscheck_kernel(at::Tensor in0, at::Tensor in1, at::Tensor out);
|
||||
|
||||
at::Tensor crosscheck_cuda(at::Tensor in0, at::Tensor in1) {
|
||||
CHECK_INPUT_CUDA(in0)
|
||||
CHECK_INPUT_CUDA(in1)
|
||||
CHECK_INPUT_CUDA(in0);
|
||||
CHECK_INPUT_CUDA(in1);
|
||||
|
||||
AT_ASSERTM(in0.dim() == 1, "")
|
||||
AT_ASSERTM(in1.dim() == 1, "")
|
||||
AT_ASSERTM(in0.dim() == 1, "");
|
||||
AT_ASSERTM(in1.dim() == 1, "");
|
||||
|
||||
auto nelem0 = in0.size(0);
|
||||
auto out = at::empty({nelem0}, torch::CUDA(at::kByte));
|
||||
@@ -45,21 +45,21 @@ at::Tensor crosscheck_cuda(at::Tensor in0, at::Tensor in1) {
|
||||
void proj_nn_kernel(at::Tensor xyz0, at::Tensor xyz1, at::Tensor K, int patch_size, at::Tensor out);
|
||||
|
||||
at::Tensor proj_nn_cuda(at::Tensor xyz0, at::Tensor xyz1, at::Tensor K, int patch_size) {
|
||||
CHECK_INPUT_CUDA(xyz0)
|
||||
CHECK_INPUT_CUDA(xyz1)
|
||||
CHECK_INPUT_CUDA(K)
|
||||
CHECK_INPUT_CUDA(xyz0);
|
||||
CHECK_INPUT_CUDA(xyz1);
|
||||
CHECK_INPUT_CUDA(K);
|
||||
|
||||
auto batch_size = xyz0.size(0);
|
||||
auto height = xyz0.size(1);
|
||||
auto width = xyz0.size(2);
|
||||
|
||||
AT_ASSERTM(xyz0.size(0) == xyz1.size(0), "")
|
||||
AT_ASSERTM(xyz0.size(1) == xyz1.size(1), "")
|
||||
AT_ASSERTM(xyz0.size(2) == xyz1.size(2), "")
|
||||
AT_ASSERTM(xyz0.size(3) == xyz1.size(3), "")
|
||||
AT_ASSERTM(xyz0.size(3) == 3, "")
|
||||
AT_ASSERTM(xyz0.dim() == 4, "")
|
||||
AT_ASSERTM(xyz1.dim() == 4, "")
|
||||
AT_ASSERTM(xyz0.size(0) == xyz1.size(0), "");
|
||||
AT_ASSERTM(xyz0.size(1) == xyz1.size(1), "");
|
||||
AT_ASSERTM(xyz0.size(2) == xyz1.size(2), "");
|
||||
AT_ASSERTM(xyz0.size(3) == xyz1.size(3), "");
|
||||
AT_ASSERTM(xyz0.size(3) == 3, "");
|
||||
AT_ASSERTM(xyz0.dim() == 4, "");
|
||||
AT_ASSERTM(xyz1.dim() == 4, "");
|
||||
|
||||
auto out = at::empty({batch_size, height, width}, torch::CUDA(at::kLong));
|
||||
|
||||
@@ -71,8 +71,8 @@ at::Tensor proj_nn_cuda(at::Tensor xyz0, at::Tensor xyz1, at::Tensor K, int patc
|
||||
void xcorrvol_kernel(at::Tensor in0, at::Tensor in1, int n_disps, int block_size, at::Tensor out);
|
||||
|
||||
at::Tensor xcorrvol_cuda(at::Tensor in0, at::Tensor in1, int n_disps, int block_size) {
|
||||
CHECK_INPUT_CUDA(in0)
|
||||
CHECK_INPUT_CUDA(in1)
|
||||
CHECK_INPUT_CUDA(in0);
|
||||
CHECK_INPUT_CUDA(in1);
|
||||
|
||||
// auto channels = in0.size(0);
|
||||
auto height = in0.size(1);
|
||||
@@ -90,8 +90,8 @@ at::Tensor xcorrvol_cuda(at::Tensor in0, at::Tensor in1, int n_disps, int block_
|
||||
void photometric_loss_forward_kernel(at::Tensor es, at::Tensor ta, int block_size, int type, float eps, at::Tensor out);
|
||||
|
||||
at::Tensor photometric_loss_forward(at::Tensor es, at::Tensor ta, int block_size, int type, float eps) {
|
||||
CHECK_INPUT_CUDA(es)
|
||||
CHECK_INPUT_CUDA(ta)
|
||||
CHECK_INPUT_CUDA(es);
|
||||
CHECK_INPUT_CUDA(ta);
|
||||
|
||||
auto batch_size = es.size(0);
|
||||
auto height = es.size(2);
|
||||
@@ -107,9 +107,9 @@ at::Tensor photometric_loss_forward(at::Tensor es, at::Tensor ta, int block_size
|
||||
void photometric_loss_backward_kernel(at::Tensor es, at::Tensor ta, at::Tensor grad_out, int block_size, int type, float eps, at::Tensor grad_in);
|
||||
|
||||
at::Tensor photometric_loss_backward(at::Tensor es, at::Tensor ta, at::Tensor grad_out, int block_size, int type, float eps) {
|
||||
CHECK_INPUT_CUDA(es)
|
||||
CHECK_INPUT_CUDA(ta)
|
||||
CHECK_INPUT_CUDA(grad_out)
|
||||
CHECK_INPUT_CUDA(es);
|
||||
CHECK_INPUT_CUDA(ta);
|
||||
CHECK_INPUT_CUDA(grad_out);
|
||||
|
||||
auto batch_size = es.size(0);
|
||||
auto channels = es.size(1);
|
||||
|
||||
@@ -8,9 +8,6 @@ include_dirs = [
|
||||
]
|
||||
|
||||
nvcc_args = [
|
||||
'-arch=sm_30',
|
||||
'-gencode=arch=compute_30,code=sm_30',
|
||||
'-gencode=arch=compute_35,code=sm_35',
|
||||
]
|
||||
|
||||
setup(
|
||||
|
||||
Reference in New Issue
Block a user