diff --git a/src/gpu/ocl/vectorized_lnorm.cpp b/src/gpu/ocl/vectorized_lnorm.cpp
index 4f5401b3bb8..e8b1f484b56 100644
--- a/src/gpu/ocl/vectorized_lnorm.cpp
+++ b/src/gpu/ocl/vectorized_lnorm.cpp
@@ -43,7 +43,6 @@ bool is_fused_kernel_applicable(lnorm_conf_t &conf,
 
     auto gpu_arch = compute_engine->device_info()->gpu_arch();
     memory_desc_wrapper src_mdw(pd->src_md());
-    memory_desc_wrapper dst_mdw(pd->src_md());
     memory_desc_wrapper stat_mdw(pd->stat_md());
     auto eu_count = compute_engine->device_info()->eu_count();
     auto max_eus_per_wg = device_info_t::max_eus_per_wg(gpu_arch);
@@ -53,9 +52,11 @@ bool is_fused_kernel_applicable(lnorm_conf_t &conf,
     const size_t max_slm_size = device_info_t::max_slm_size(gpu_arch);
 
     // Plain layout only
-    if (!(src_mdw.matches_one_of_tag(ab, abc, abcd, abcde)
-                && stat_mdw.matches_one_of_tag(a, ab)))
-        return false;
+    const bool is_plain = src_mdw.matches_one_of_tag(ab, abc)
+            && stat_mdw.matches_one_of_tag(a, ab)
+            // kernel does not support M x 1 x N layout
+            && IMPLICATION(src_mdw.ndims() == 3, src_mdw.dims()[1] != 1);
+    if (!is_plain) return false;
 
     const int desired_sg_size = 16; // based on PVC performance data
     conf.sub_group_size = mayiuse_sg(desired_sg_size, engine)