Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[tlm_teamd] get_dumps log because of a redundant keyspace notif handled #5

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tlm_teamd/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "teamdctl_mgr.h"
#include "values_store.h"

#define TIMEOUT_MAX_RETRY 3
#define DEL_NOTIF_MAX_RETRY 1

bool g_run = true;

Expand Down Expand Up @@ -98,7 +100,9 @@ int main()
if (res == swss::Select::OBJECT)
{
update_interfaces(sst_lag, teamdctl_mgr);
values_store.update(teamdctl_mgr.get_dumps(false));
// A portchannel config del can cause two notifications.
// A SET and DEL. It triggers get_dumps for a resource which is already deleted
values_store.update(teamdctl_mgr.get_dumps(DEL_NOTIF_MAX_RETRY));
}
else if (res == swss::Select::ERROR)
{
Expand All @@ -111,7 +115,7 @@ int main()
// In the case of lag removal, there is a scenario where the select::TIMEOUT
// occurs, it triggers get_dumps incorrectly for resource which was in process of
// getting deleted. The fix here is to retry and check if this is a real failure.
values_store.update(teamdctl_mgr.get_dumps(true));
values_store.update(teamdctl_mgr.get_dumps(TIMEOUT_MAX_RETRY));
}
else
{
Expand Down
20 changes: 9 additions & 11 deletions tlm_teamd/teamdctl_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

#include "teamdctl_mgr.h"

#define MAX_RETRY 3

///
/// Custom function for libteamdctl logger. IT is empty to prevent libteamdctl to spam us with the error messages
/// @param tdc teamdctl descriptor
Expand Down Expand Up @@ -174,12 +172,12 @@ void TeamdCtlMgr::process_add_queue()
///
/// Get json dump from teamd for LAG interface with name lag_name
/// @param lag_name a name for LAG interface
/// @param to_retry is the flag used to do retry or not.
/// @param num_retry number of retries if the dump is not found.
/// @return a pair. First element of the pair is true, if the method is successful
/// false otherwise. If the first element is true, the second element has a dump
/// otherwise the second element is an empty string
///
TeamdCtlDump TeamdCtlMgr::get_dump(const std::string & lag_name, bool to_retry)
TeamdCtlDump TeamdCtlMgr::get_dump(const std::string & lag_name, int num_retry)
{
TeamdCtlDump res = { false, "" };
if (has_key(lag_name))
Expand All @@ -200,12 +198,12 @@ TeamdCtlDump TeamdCtlMgr::get_dump(const std::string & lag_name, bool to_retry)
}
else
{
// In case of failure and retry flag is set, check if it fails for MAX_RETRY times.
if (to_retry)
// In case of failure check if it fails for num_retry times.
if (num_retry > 0)
{
if (m_lags_err_retry.find(lag_name) != m_lags_err_retry.end())
{
if (m_lags_err_retry[lag_name] == MAX_RETRY)
if (m_lags_err_retry[lag_name] == num_retry)
{
SWSS_LOG_ERROR("Can't get dump for LAG '%s'. Skipping", lag_name.c_str());
m_lags_err_retry.erase(lag_name);
Expand All @@ -215,9 +213,8 @@ TeamdCtlDump TeamdCtlMgr::get_dump(const std::string & lag_name, bool to_retry)
}
else
{

// This time a different lag interface errored out.
m_lags_err_retry[lag_name] = 1;
m_lags_err_retry[lag_name] = 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please check if white space is introduced?

}
}
else
Expand All @@ -239,14 +236,14 @@ TeamdCtlDump TeamdCtlMgr::get_dump(const std::string & lag_name, bool to_retry)
/// Get dumps for all registered LAG interfaces
/// @return vector of pairs. Each pair first value is a name of LAG, second value is a dump
///
TeamdCtlDumps TeamdCtlMgr::get_dumps(bool to_retry)
TeamdCtlDumps TeamdCtlMgr::get_dumps(int num_retry)
{
TeamdCtlDumps res;

for (const auto & p: m_handlers)
{
const auto & lag_name = p.first;
const auto & result = get_dump(lag_name, to_retry);
const auto & result = get_dump(lag_name, num_retry);
const auto & status = result.first;
const auto & dump = result.second;
if (status)
Expand All @@ -258,3 +255,4 @@ TeamdCtlDumps TeamdCtlMgr::get_dumps(bool to_retry)
return res;
}


4 changes: 2 additions & 2 deletions tlm_teamd/teamdctl_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class TeamdCtlMgr
bool remove_lag(const std::string & lag_name);
void process_add_queue();
// Retry logic added to prevent incorrect error reporting in dump API's
TeamdCtlDump get_dump(const std::string & lag_name, bool to_retry);
TeamdCtlDumps get_dumps(bool to_retry);
TeamdCtlDump get_dump(const std::string & lag_name, int num_retry);
TeamdCtlDumps get_dumps(int num_retry);

private:
bool has_key(const std::string & lag_name) const;
Expand Down