Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Dev/licenses #312

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ dependencies/.extracted
.idea

.idea/misc.xml

build/
.cache/
# Deployment stuff. Containing some personal info.
.idea/deployment.xml
.idea/other.xml
Expand Down
3 changes: 3 additions & 0 deletions etc/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ PendingQueueMaxSize: 900000
# Default value is 100000.
ScheduledBatchSize: 100000

# The set licenses and their quantities (Only need to configure in the ctld config file.)
# Licenses: fluent:30,ansys:100

# Scheduler will reject all jobs beyond processing capacity set by PendingQueueMaxSize
# if this option is set to true.
# Default value is false.
Expand Down
114 changes: 69 additions & 45 deletions protos/Crane.proto
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ message SubmitBatchTaskRequest {

message SubmitBatchTaskReply {
bool ok = 1;
oneof payload{
oneof payload {
uint32 task_id = 2;
string reason = 3;
}
Expand Down Expand Up @@ -90,14 +90,14 @@ message CreateCgroupForTasksRequest {
repeated string execution_node = 4;
}

message CreateCgroupForTasksReply{}
message CreateCgroupForTasksReply {}

message ReleaseCgroupForTasksRequest{
message ReleaseCgroupForTasksRequest {
repeated uint32 task_id_list = 1;
repeated uint32 uid_list = 2;
}

message ReleaseCgroupForTasksReply{}
message ReleaseCgroupForTasksReply {}

message TerminateTasksRequest {
repeated uint32 task_id_list = 1;
Expand Down Expand Up @@ -154,30 +154,28 @@ message CancelTaskReply {
repeated string not_cancelled_reasons = 4;
}


message QueryTaskIdFromPortRequest{
message QueryTaskIdFromPortRequest {
uint32 port = 1;
}

message QueryTaskIdFromPortReply{
message QueryTaskIdFromPortReply {
bool ok = 1;
uint32 task_id = 2;
}

message QueryTaskIdFromPortForwardRequest{
message QueryTaskIdFromPortForwardRequest {
uint32 ssh_remote_port = 1;
string ssh_remote_address = 2;
uint32 uid = 3;
}

message QueryTaskIdFromPortForwardReply{
message QueryTaskIdFromPortForwardReply {
bool ok = 1;
bool from_user = 2;
uint32 task_id = 3;
string cgroup_path = 4;
}


message QueryCranedInfoRequest {
string craned_name = 1;
}
Expand All @@ -194,6 +192,15 @@ message QueryPartitionInfoReply {
repeated PartitionInfo partition_info = 1;
}

message QueryLicensesInfoRequest {
repeated string license_name_list = 1;
}

message QueryLicensesInfoReply {
bool ok = 1;
repeated LicenseInfo license_info_list = 2;
}

message ModifyTaskRequest {
enum TargetAttributes {
TimeLimit = 0;
Expand Down Expand Up @@ -230,7 +237,7 @@ message ModifyCranedStateRequest{
string reason = 4;
}

message ModifyCranedStateReply{
message ModifyCranedStateReply {
repeated string modified_nodes = 2;
repeated string not_modified_nodes = 3;
repeated string not_modified_reasons = 4;
Expand Down Expand Up @@ -395,21 +402,21 @@ message MigrateSshProcToCgroupReply {
bool ok = 1;
}

message QueryTaskEnvVariablesRequest{
message QueryTaskEnvVariablesRequest {
uint32 task_id = 1;
}

message QueryTaskEnvVariablesReply{
message QueryTaskEnvVariablesReply {
bool ok = 1;
map<string/*name*/, string/*value*/> env_map = 2;
}

message QueryTaskEnvVariablesForwardRequest{
message QueryTaskEnvVariablesForwardRequest {
uint32 task_id = 1;
string execution_node = 2;
}

message QueryTaskEnvVariablesForwardReply{
message QueryTaskEnvVariablesForwardReply {
bool ok = 1;
map<string/*name*/, string/*value*/> env_map = 2;
}
Expand All @@ -426,7 +433,7 @@ message QueryClusterInfoReply {
repeated TrimmedPartitionInfo partitions = 2;
}

message QueryTasksInfoRequest{
message QueryTasksInfoRequest {
repeated uint32 filter_task_ids = 1;
repeated string filter_partitions = 2;
uint32 num_limit = 3;
Expand All @@ -436,14 +443,15 @@ message QueryTasksInfoRequest{
repeated TaskStatus filter_task_states = 6;
repeated string filter_users = 7;
repeated string filter_accounts = 8;
TimeInterval filter_submit_time_interval = 9;
TimeInterval filter_start_time_interval = 10;
TimeInterval filter_end_time_interval = 11;
repeated string filter_licenses = 9;
TimeInterval filter_submit_time_interval = 10;
TimeInterval filter_start_time_interval = 11;
TimeInterval filter_end_time_interval = 12;

bool option_include_completed_tasks = 15;
}

message QueryTasksInfoReply{
message QueryTasksInfoReply {
bool ok = 1;
repeated TaskInfo task_info_list = 2;
}
Expand Down Expand Up @@ -499,7 +507,7 @@ message StreamCallocReply {
bool ok = 1;
}

CforedReplyType type = 1 ;
CforedReplyType type = 1;

oneof payload {
TaskIdReply payload_task_id_reply = 2;
Expand Down Expand Up @@ -672,11 +680,11 @@ message StreamCrunReply {
bool ok = 1;
}

message TaskIOForwardReply{
message TaskIOForwardReply {
string msg = 1;
}

CforedCrunReplyType type = 1 ;
CforedCrunReplyType type = 1;

oneof payload {
TaskIdReply payload_task_id_reply = 2;
Expand Down Expand Up @@ -754,7 +762,7 @@ service CraneCtld {
rpc CranedRegister(CranedRegisterRequest) returns (CranedRegisterReply);

/* RPCs called from Cfored */
rpc CforedStream(stream StreamCforedRequest) returns(stream StreamCtldReply);
rpc CforedStream(stream StreamCforedRequest) returns (stream StreamCtldReply);

/* RPCs called from ccancel */
rpc CancelTask(CancelTaskRequest) returns (CancelTaskReply);
Expand All @@ -765,7 +773,10 @@ service CraneCtld {

/* PRCs called from ccontrol */
rpc QueryCranedInfo(QueryCranedInfoRequest) returns (QueryCranedInfoReply);
rpc QueryPartitionInfo(QueryPartitionInfoRequest) returns (QueryPartitionInfoReply);
rpc QueryPartitionInfo(QueryPartitionInfoRequest)
returns (QueryPartitionInfoReply);
rpc QueryLicensesInfo(QueryLicensesInfoRequest)
returns (QueryLicensesInfoReply);
rpc ModifyTask(ModifyTaskRequest) returns (ModifyTaskReply);
rpc ModifyNode(ModifyCranedStateRequest) returns (ModifyCranedStateReply);

Expand Down Expand Up @@ -796,34 +807,47 @@ service CraneCtld {
}

service Craned {
/* ----------------------------------- Called from CraneCtld ---------------------------------------------------- */
rpc ExecuteTask(ExecuteTasksRequest) returns(ExecuteTasksReply);
/* ----------------------------------- Called from CraneCtld
* ---------------------------------------------------- */
rpc ExecuteTask(ExecuteTasksRequest) returns (ExecuteTasksReply);

rpc CheckTaskStatus(CheckTaskStatusRequest) returns(CheckTaskStatusReply);
rpc CheckTaskStatus(CheckTaskStatusRequest) returns (CheckTaskStatusReply);

rpc CreateCgroupForTasks(CreateCgroupForTasksRequest) returns(CreateCgroupForTasksReply);
rpc ReleaseCgroupForTasks(ReleaseCgroupForTasksRequest) returns(ReleaseCgroupForTasksReply);
rpc CreateCgroupForTasks(CreateCgroupForTasksRequest)
returns (CreateCgroupForTasksReply);
rpc ReleaseCgroupForTasks(ReleaseCgroupForTasksRequest)
returns (ReleaseCgroupForTasksReply);

rpc QueryCranedRemoteMeta(QueryCranedRemoteMetaRequest) returns(QueryCranedRemoteMetaReply);
rpc QueryCranedRemoteMeta(QueryCranedRemoteMetaRequest)
returns (QueryCranedRemoteMetaReply);

/*
If the task is an interactive task, the resource uuid is also revoked.
If there's no process in this interactive task, just deallocate all the resources.
If there are processes in this interactive task, kill all the processes and deallocate resources.
If the task is a batch task, just kill it.
If there's no process in this interactive task, just deallocate all the
resources. If there are processes in this interactive task, kill all the
processes and deallocate resources. If the task is a batch task, just kill it.
*/
rpc TerminateTasks(TerminateTasksRequest) returns (TerminateTasksReply);
rpc TerminateOrphanedTask(TerminateOrphanedTaskRequest) returns (TerminateOrphanedTaskReply);
rpc ChangeTaskTimeLimit(ChangeTaskTimeLimitRequest) returns (ChangeTaskTimeLimitReply);

/* ----------------------------------- Called from Craned ------------------------------------------------------ */
rpc QueryTaskIdFromPort(QueryTaskIdFromPortRequest) returns (QueryTaskIdFromPortReply);

/* ----------------------------------- Called from Pam Module --------------------------------------------------- */
rpc QueryTaskIdFromPortForward(QueryTaskIdFromPortForwardRequest) returns (QueryTaskIdFromPortForwardReply);
rpc MigrateSshProcToCgroup(MigrateSshProcToCgroupRequest) returns (MigrateSshProcToCgroupReply);
rpc QueryTaskEnvVariables(QueryTaskEnvVariablesRequest) returns (QueryTaskEnvVariablesReply);
rpc QueryTaskEnvVariablesForward(QueryTaskEnvVariablesForwardRequest) returns (QueryTaskEnvVariablesForwardReply);
rpc TerminateOrphanedTask(TerminateOrphanedTaskRequest)
returns (TerminateOrphanedTaskReply);
rpc ChangeTaskTimeLimit(ChangeTaskTimeLimitRequest)
returns (ChangeTaskTimeLimitReply);

/* ----------------------------------- Called from Craned
* ------------------------------------------------------ */
rpc QueryTaskIdFromPort(QueryTaskIdFromPortRequest)
returns (QueryTaskIdFromPortReply);

/* ----------------------------------- Called from Pam Module
* --------------------------------------------------- */
rpc QueryTaskIdFromPortForward(QueryTaskIdFromPortForwardRequest)
returns (QueryTaskIdFromPortForwardReply);
rpc MigrateSshProcToCgroup(MigrateSshProcToCgroupRequest)
returns (MigrateSshProcToCgroupReply);
rpc QueryTaskEnvVariables(QueryTaskEnvVariablesRequest)
returns (QueryTaskEnvVariablesReply);
rpc QueryTaskEnvVariablesForward(QueryTaskEnvVariablesForwardRequest)
returns (QueryTaskEnvVariablesForwardReply);
}

service CraneForeD {
Expand Down
40 changes: 26 additions & 14 deletions protos/PublicDefs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,13 @@ message TaskToCtld {
string extra_attr = 23;

string cmd_line = 31;
string cwd = 32; // Current working directory
string cwd = 32; // Current working directory
map<string, string> env = 33;

string excludes = 34;
string nodelist = 35;

map<string, uint32> licenses_count = 36;
}

message TaskInEmbeddedDb {
Expand Down Expand Up @@ -183,7 +185,7 @@ message TaskToD {
// If this task is PENDING, start_time is either not set (default constructed)
// or an estimated start time.
// If this task is RUNNING, start_time is the actual starting time.
google.protobuf.Timestamp start_time = 5; // Currently Only used in CraneCtld
google.protobuf.Timestamp start_time = 5; // Currently Only used in CraneCtld
google.protobuf.Duration time_limit = 6;

string partition = 8;
Expand Down Expand Up @@ -260,6 +262,7 @@ message TaskInfo {
string username = 15;
string qos = 16;
ResourceView res_view = 17;
map<string, uint32> licenses_count = 18;
repeated string req_nodes = 19;
repeated string exclude_nodes = 20;

Expand All @@ -276,10 +279,11 @@ message TaskInfo {
string craned_list = 36;
}

// The time of different nodes across the whole cluster might not always be synchronized.
// If the time on the front end node is more than several seconds ahead of the CraneCtld node,
// a negative elapsed time might occur.
// To avoid this, the elapsed time of a task is calculated on the CraneCtld side.
// The time of different nodes across the whole cluster might not always be
// synchronized. If the time on the front end node is more than several
// seconds ahead of the CraneCtld node, a negative elapsed time might occur.
// To avoid this, the elapsed time of a task is calculated on the CraneCtld
// side.
google.protobuf.Duration elapsed_time = 37;
repeated string execution_node = 38;
}
Expand Down Expand Up @@ -434,15 +438,17 @@ message AccountInfo {
bool blocked = 10;
}

// Note: UserInfo DIFFERS from the `User` struct in C++ code and database representation
// and is ONLY used for communication between CraneCtld and cacctmgr command.
// If an user belongs to multiple accounts, There will be multiple `UserInfo`
// messages with `account` pointing to each account.
// Note: UserInfo DIFFERS from the `User` struct in C++ code and database
// representation
// and is ONLY used for communication between CraneCtld and cacctmgr
// command. If an user belongs to multiple accounts, There will be
// multiple `UserInfo` messages with `account` pointing to each account.
// For example, if a user (uid=1) belongs to accounts `1,2,3`,
// there will be three `UserInfo` messages: (uid=1, account=1), (uid=1, account=2),
// (uid=1, account=3).
// The c++ code and database representation use a Map<account name, AttrsInAccount> to contain
// in ONE UserInfo message all the information belonging to different accounts.
// there will be three `UserInfo` messages: (uid=1, account=1), (uid=1,
// account=2), (uid=1, account=3). The c++ code and database
// representation use a Map<account name, AttrsInAccount> to contain in
// ONE UserInfo message all the information belonging to different
// accounts.
message UserInfo {
enum AdminLevel {
None = 0;
Expand Down Expand Up @@ -492,3 +498,9 @@ message CranedRemoteMeta {
google.protobuf.Timestamp craned_start_time = 4;
google.protobuf.Timestamp system_boot_time = 5;
}
message LicenseInfo {
string name = 1;
uint32 total = 2;
uint32 used = 3;
uint32 free = 4;
}
2 changes: 2 additions & 0 deletions src/CraneCtld/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ add_executable(cranectld
AccountManager.cpp
AccountMetaContainer.h
AccountMetaContainer.cpp
LicensesManager.h
LicensesManager.cpp
EmbeddedDbClient.cpp
EmbeddedDbClient.h
CraneCtld.cpp
Expand Down
Loading
Loading