Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Support new reid model #430

Merged
merged 1 commit into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@

# Last changes

* Re-identification model osnet_x0_25_msmt17 from [mikel-brostrom/yolo_tracking](https://github.com/mikel-brostrom/yolo_tracking)

* YOLOv8 detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example

* Some experiments with YOLOv7_mask and results with rotated rectangles: detector works tracker in progress

* YOLOv7 worked with TensorRT! Export pretrained Pytorch models [here (WongKinYiu/yolov7)](https://github.com/WongKinYiu/yolov7) to onnx format and run Multitarget-tracker with -e=6 example

* YOLOv6 worked with TensorRT! Download pretrained onnx models [here (meituan/YOLOv6)](https://github.com/meituan/YOLOv6/releases/tag/0.1.0) and run Multitarget-tracker with -e=6 example

# New videos!

* YOLOv7 instance segmentation
Expand Down
Binary file added data/reid/osnet_x0_25_msmt17.onnx
Binary file not shown.
56 changes: 39 additions & 17 deletions example/examples.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ class YoloDarknetExample final : public VideoExample
{
if (!m_trackerSettingsLoaded)
{
bool useDeepSORT = false;
bool useDeepSORT = true;
if (useDeepSORT)
{
#ifdef _WIN32
Expand All @@ -681,26 +681,18 @@ class YoloDarknetExample final : public VideoExample
std::string pathToModel = "../data/";
#endif

#if 1
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.xml",
pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.bin",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person") });
#endif

#if 0
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "open_model_zoo/vehicle-reid-0001/osnet_ain_x1_0_vehicle_reid.xml",
pathToModel + "open_model_zoo/vehicle-reid-0001/osnet_ain_x1_0_vehicle_reid.bin",
cv::Size(208, 208),
std::vector<objtype_t>{ TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });
#endif
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "reid/osnet_x0_25_msmt17.onnx",
pathToModel + "reid/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person"), TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });

std::array<track_t, tracking::DistsCount> distType{
0.f, // DistCenters
0.f, // DistRects
0.5f, // DistJaccard
0.f, // DistHist
0.5f // DistFeatureCos
0.5f, // DistFeatureCos
0.f // DistMahalanobis
};
if (!m_trackerSettings.SetDistances(distType))
std::cerr << "SetDistances failed! Absolutly summ must be equal 1" << std::endl;
Expand Down Expand Up @@ -843,7 +835,7 @@ class YoloTensorRTExample final : public VideoExample
YOLOv7Mask,
YOLOv8
};
YOLOModels usedModel = YOLOModels::YOLOv5;
YOLOModels usedModel = YOLOModels::YOLOv4;
switch (usedModel)
{
case YOLOModels::TinyYOLOv3:
Expand Down Expand Up @@ -980,7 +972,37 @@ class YoloTensorRTExample final : public VideoExample
{
if (!m_trackerSettingsLoaded)
{
m_trackerSettings.SetDistance(tracking::DistCenters);
bool useDeepSORT = true;
if (useDeepSORT)
{
#ifdef _WIN32
std::string pathToModel = "../../data/";
#else
std::string pathToModel = "../data/";
#endif

m_trackerSettings.m_embeddings.emplace_back(pathToModel + "reid/osnet_x0_25_msmt17.onnx",
pathToModel + "reid/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person"), TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });

std::array<track_t, tracking::DistsCount> distType{
0.f, // DistCenters
0.f, // DistRects
0.5f, // DistJaccard
0.f, // DistHist
0.5f, // DistFeatureCos
0.f // DistMahalanobis
};
if (!m_trackerSettings.SetDistances(distType))
std::cerr << "SetDistances failed! Absolutly summ must be equal 1" << std::endl;
}
else
{
m_trackerSettings.SetDistance(tracking::DistCenters);
}

//m_trackerSettings.SetDistance(tracking::DistCenters);
m_trackerSettings.m_kalmanType = tracking::KalmanLinear;
m_trackerSettings.m_filterGoal = tracking::FilterCenter;
m_trackerSettings.m_lostTrackType = tracking::TrackKCF; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect
Expand Down
127 changes: 127 additions & 0 deletions example/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,100 @@ const char* keys =

// ----------------------------------------------------------------------

#pragma once

///
/// \brief The EmbeddingsCalculator class
///
class EmbeddingsCalculatorSimple
{
public:
EmbeddingsCalculatorSimple() = default;
virtual ~EmbeddingsCalculatorSimple() = default;

///
bool Initialize(const std::string& cfgName, const std::string& weightsName, const cv::Size& inputLayer)
{
m_inputLayer = inputLayer;

#if 1
m_net = cv::dnn::readNet(weightsName);
#else
m_net = cv::dnn::readNetFromTorch(weightsName);
#endif
if (!m_net.empty())
{
//m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
//m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);

auto outNames = m_net.getUnconnectedOutLayersNames();
auto outLayers = m_net.getUnconnectedOutLayers();
auto outLayerType = m_net.getLayer(outLayers[0])->type;

std::vector<cv::dnn::MatShape> outputs;
std::vector<cv::dnn::MatShape> internals;
m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals);
std::cout << "REID: getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl;
if (outputs.size() && outputs[0].size() > 3)
std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl;
}
return !m_net.empty();
}

///
bool IsInitialized() const
{
return !m_net.empty();
}

///
cv::Mat Calc(const cv::Mat& img, cv::Rect rect)
{
auto Clamp = [](int& v, int& size, int hi) -> int
{
int res = 0;
if (v < 0)
{
res = v;
v = 0;
return res;
}
else if (v + size > hi - 1)
{
res = v;
v = hi - 1 - size;
if (v < 0)
{
size += v;
v = 0;
}
res -= v;
return res;
}
return res;
};
Clamp(rect.x, rect.width, img.cols);
Clamp(rect.y, rect.height, img.rows);

cv::Mat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_CUBIC);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0 / 255.0, cv::Size(), cv::Scalar(), false, false, CV_32F);

m_net.setInput(blob);
cv::Mat embedding;
std::cout << "embedding: " << embedding.size() << ", chans = " << embedding.channels() << std::endl;
//std::cout << "orig: " << embedding << std::endl;
cv::normalize(m_net.forward(), embedding);
//std::cout << "normalized: " << embedding << std::endl;
return embedding;
}

private:
cv::dnn::Net m_net;
cv::Size m_inputLayer{ 128, 256 };
};


int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv, keys);
Expand All @@ -54,6 +148,39 @@ int main(int argc, char** argv)
cv::ocl::setUseOpenCL(useOCL);
std::cout << (cv::ocl::useOpenCL() ? "OpenCL is enabled" : "OpenCL not used") << std::endl;

#if 0
EmbeddingsCalculatorSimple ec;
ec.Initialize("C:/work/home/mtracker/tmp/reid/models/osnet_x0_25_msmt17.onnx",
"C:/work/home/mtracker/tmp/reid/models/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256));
std::cout << "ec.IsInitialized(): " << ec.IsInitialized() << std::endl;

cv::Mat img = cv::imread("C:/work/home/mtracker/Multitarget-tracker/build/Release/vlcsnap-2023-10-06-17h31m54s413.png");
cv::Rect r1(564, 526, 124, 260);
//cv::Rect r2(860, 180, 48, 160);
cv::Rect r2(560, 522, 132, 264);

cv::Mat e1 = ec.Calc(img, r1);
cv::Mat e2 = ec.Calc(img, r2);

//cv::Mat mul = e1 * e2.t();
std::cout << "e1: " << e1 << std::endl;
std::cout << "e2: " << e2 << std::endl;
cv::Mat diff;
cv::absdiff(e1, e2, diff);
cv::Scalar ss = cv::sum(diff);
cv::Mat mul = e1 * e2.t();
float res = static_cast<float>(1.f - mul.at<float>(0, 0));
std::cout << "mul = " << mul << ", sum = " << ss << ", res = " << res << std::endl;

cv::rectangle(img, r1, cv::Scalar(255, 0, 255));
cv::rectangle(img, r2, cv::Scalar(255, 0, 0));
cv::imshow("img", img);
cv::waitKey(0);

return 0;
#endif

int exampleNum = parser.get<int>("example");
int asyncPipeline = parser.get<int>("async");

Expand Down
7 changes: 3 additions & 4 deletions src/Tracker/Ctracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ void CTracker::CreateDistaceMatrix(const regions_t& regions,
if (resCos.second)
{
dist += m_settings.m_distType[ind] * resCos.first;
//std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = " << resCos.value() << ", dist = " << dist << std::endl;
//std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = " << resCos.first << ", dist = " << dist << std::endl;
}
else
{
Expand Down Expand Up @@ -628,13 +628,12 @@ void CTracker::CalcEmbeddins(std::vector<RegionEmbedding>& regionEmbeddings, con
if (embCalc != std::end(m_embCalculators))
{
embCalc->second->Calc(currFrame, regions[j].m_brect, regionEmbeddings[j].m_embedding);
regionEmbeddings[j].m_embDot = regionEmbeddings[j].m_embedding.dot(regionEmbeddings[j].m_embedding);

// std::cout << "Founded! m_embedding = " << regionEmbeddings[j].m_embedding.size() << ", m_embDot = " << regionEmbeddings[j].m_embDot << std::endl;
//std::cout << "Founded! m_embedding = " << regionEmbeddings[j].m_embedding.size() << std::endl;
}
else
{
// std::cout << "Not found" << std::endl;
//std::cout << "Not found" << std::endl;
}
}
}
Expand Down
58 changes: 50 additions & 8 deletions src/Tracker/EmbeddingsCalculator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,56 @@ class EmbeddingsCalculator
m_inputLayer = inputLayer;

#if 1
m_net = cv::dnn::readNet(weightsName, cfgName);
m_net = cv::dnn::readNet(weightsName);
#else
m_net = cv::dnn::readNetFromTensorflow(weightsName, cfgName);
m_net = cv::dnn::readNetFromTorch(weightsName);
#endif
if (!m_net.empty())
{
m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4))
std::map<cv::dnn::Target, std::string> dictTargets;
dictTargets[cv::dnn::DNN_TARGET_CPU] = "DNN_TARGET_CPU";
dictTargets[cv::dnn::DNN_TARGET_OPENCL] = "DNN_TARGET_OPENCL";
dictTargets[cv::dnn::DNN_TARGET_OPENCL_FP16] = "DNN_TARGET_OPENCL_FP16";
dictTargets[cv::dnn::DNN_TARGET_MYRIAD] = "DNN_TARGET_MYRIAD";
dictTargets[cv::dnn::DNN_TARGET_CUDA] = "DNN_TARGET_CUDA";
dictTargets[cv::dnn::DNN_TARGET_CUDA_FP16] = "DNN_TARGET_CUDA_FP16";

std::map<int, std::string> dictBackends;
dictBackends[cv::dnn::DNN_BACKEND_DEFAULT] = "DNN_BACKEND_DEFAULT";
dictBackends[cv::dnn::DNN_BACKEND_HALIDE] = "DNN_BACKEND_HALIDE";
dictBackends[cv::dnn::DNN_BACKEND_INFERENCE_ENGINE] = "DNN_BACKEND_INFERENCE_ENGINE";
dictBackends[cv::dnn::DNN_BACKEND_OPENCV] = "DNN_BACKEND_OPENCV";
dictBackends[cv::dnn::DNN_BACKEND_VKCOM] = "DNN_BACKEND_VKCOM";
dictBackends[cv::dnn::DNN_BACKEND_CUDA] = "DNN_BACKEND_CUDA";
dictBackends[1000000] = "DNN_BACKEND_INFERENCE_ENGINE_NGRAPH";
dictBackends[1000000 + 1] = "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019";

std::cout << "Avaible pairs for Target - backend:" << std::endl;
std::vector<std::pair<cv::dnn::Backend, cv::dnn::Target>> pairs = cv::dnn::getAvailableBackends();
for (auto p : pairs)
{
std::cout << dictBackends[p.first] << " (" << p.first << ") - " << dictTargets[p.second] << " (" << p.second << ")" << std::endl;

if (p.first == cv::dnn::DNN_BACKEND_CUDA)
{
//m_net.setPreferableTarget(p.second);
//m_net.setPreferableBackend(p.first);
//std::cout << "Set!" << std::endl;
}
}
#endif

auto outNames = m_net.getUnconnectedOutLayersNames();
auto outLayers = m_net.getUnconnectedOutLayers();
auto outLayerType = m_net.getLayer(outLayers[0])->type;

std::vector<cv::dnn::MatShape> outputs;
std::vector<cv::dnn::MatShape> internals;
m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals);
std::cout << "REID: getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl;
if (outputs.size() && outputs[0].size() > 3)
std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl;
}
return !m_net.empty();
#else
Expand Down Expand Up @@ -72,13 +114,13 @@ class EmbeddingsCalculator
Clamp(rect.x, rect.width, img.cols);
Clamp(rect.y, rect.height, img.rows);

cv::UMat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_LANCZOS4);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0, cv::Size(), cv::Scalar(), false, false);
cv::Mat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_CUBIC);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0 / 255.0, cv::Size(), cv::Scalar(), false, false, CV_32F);

m_net.setInput(blob);
embedding = m_net.forward();
//std::cout << "embedding: " << embedding.size() << ", chans = " << embedding.channels() << std::endl;
cv::normalize(m_net.forward(), embedding);
#else
std::cerr << "EmbeddingsCalculator was disabled in CMAKE! Check SetDistances params." << std::endl;
#endif
Expand Down
11 changes: 4 additions & 7 deletions src/Tracker/track.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,10 @@ std::pair<track_t, bool> CTrack::CalcCosine(const RegionEmbedding& embedding) co
track_t res = 1;
if (!embedding.m_embedding.empty() && !m_regionEmbedding.m_embedding.empty())
{
double xy = embedding.m_embedding.dot(m_regionEmbedding.m_embedding);
double norm = sqrt(embedding.m_embDot * m_regionEmbedding.m_embDot) + 1e-6;
#if 0
res = 1.f - 0.5f * fabs(static_cast<float>(xy / norm));
#else
res = 0.5f * static_cast<float>(1.0 - xy / norm);
#endif
cv::Mat mul = embedding.m_embedding * m_regionEmbedding.m_embedding.t();
res = static_cast<track_t>(1.f - mul.at<float>(0, 0));
if (res < 0)
res = 0;
//std::cout << "CTrack::CalcCosine: " << embedding.m_embedding.size() << " - " << m_regionEmbedding.m_embedding.size() << " = " << res << std::endl;
return { res, true };
}
Expand Down
1 change: 0 additions & 1 deletion src/Tracker/track.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ struct RegionEmbedding
{
cv::Mat m_hist;
cv::Mat m_embedding;
double m_embDot = 0.;
};

///
Expand Down