50 modelname = "NGCF" + \
51 "_bs_" + str(batch_size) + \
52 "_nemb_" + str(emb_dim) + \
53 "_layers_" + str(layers) + \
54 "_nodedr_" + str(node_dropout) + \
55 "_messdr_" + str(mess_dropout) + \
56 "_reg_" + str(reg) + \
57 "_lr_" + str(lr)
- bs: batch size
- nemb : embedding dimension
- layers : layers
- nodedr : node dropout (Medium Post: Dropout in (Deep) Machine Learning)
- messdr : mess dropout
- reg : reg (?)
- lr : lr (Logistic Regression for Machine Learning)
78 cur_best_loss, stopping_step, should_step = le3, 0, False
79 today = datetime.now()
- cur_best_loss : ?
- stopping_step : ?
- should_stop : ?
92 for epoch in range(args.n_epochs):
93 t1 = time()
94 loss = train(model, data_generator, optimizer) # ./utils/helper_functions.py
95 training_time = time() - t1
96 print("Epoch: {}/{}, Training Time: {:.2f}s, Loss: {:.4f}".
97 format(epoch, args.n_epochs, training_time, loss))
An epoch in machine learning means 'one complete pass of the training dataset through the algorithm'. This epochs number is an important hyperparameter for the algorithm. It specifies the number of epochs or complete passes of the entire training dataset passing through the training or learning process of the algorithm. With each epoch, the dataset’s internal model parameters are updated. Hence, a 1 batch epoch is called the batch gradient descent learning algorithm. Normally the batch size of an epoch is 1 or more and is always an integer value in what is epoch number. <Reference: Epoch in Machine Learning: A Simple Introduction(2021)>
22 df = pd.read_csv(path + '/ratings.csv', header=None)
23 f = open(path + 'ratings.txt', 'w')
24 temp = df[0][0]
25 f.write(str(int(temp)))
26 f.write(' ')
27 for i in range(len(df)):
28 if(df[0][i] != temp):
29 f.write('\n')
30 f.write(str(int(df[0][i])))
31 f.write(' ')
32 temp = df[0][i]
33 f.write(str(int(df[1][i])))
34 f.write(' ')
35 f.close()
36 train_file = path + '/ratings.txt'
37 test_file = '...' + './test.txt'
- Origin Form -
ratings.csv
userId,movieId,rating,timestamp
- Transformed Form -
ratings.txt
userId movieId1 movieId2 movieId3 ...
47 with open(train_file) as f:
48 for l in f.readlines():
49 if len(l) > 0:
50 items = []
51 l = l.strip('\n').split(' ') # l = [userId, movieId1, movieId2, ...]
52 for i in l[1:]: # movieIds
53 try:
54 items.append(int(i)) # Append 'movieIds' to list 'items'
55 except Exception:
56 continue
58 uid = int(l[0]) # 'userId' is equal to l[0]
59 self.exist_users.append(uid) # Append 'userId' to list 'exist_users'
60 self.n_items = max(self.n_items, max(items)) # Allocate a maximum number of items to 'n_items'
61 self.n_users = max(self.n_users, uid) # Allocate a maximum number of users to 'n_users'
62 self.n_train += len(items)
90 self.R_train = sp.dok_matrix((self.n_users, self.n_items), dtype = np.float32)
Create (self.n_users)x(self.n_items) matrix
94 with open(train_file) as f_train:
95 with open(test_file) as f_test:
96 for l in f_train.readlines():
97 items = []
98 if len(l) == 0: break
99 l = l.strip('\n').split(' ') # l = [userId, movieId1, movieId2, movieId3, ...]
100 for i in l:
101 try:
102 items.append(int(i)) # Append movieIds to list 'items'
103 except:
104 continue
105 uid, train_itemss = items[0], items[1:] # train_itemss = [movieId1, movieId2, ...]
107 for i in train_itemss: # for-loop : movieIds
108 self.R_train[uid, i] = 1. # Set 1 as a value of uid-th row and i-th col of R_train
109 self.train_items[uid] = train_itemss
37 def train(model, data_generator, optimizer):
38 """
39 Train the model PyTorch Style
41 Arguments:
43 model: PyTorch Model
44 data_generator: Data Object
45 optimizer: PyTorch Optimizer
46 """
47 model.train()
48 n_batch = data_generator.n_train // data_generator.batch_size + 1
49 running_loss = 0
50 for _ in range(n_batch):
51 u, i, j = data_generator.sample()
52 optimizer.zero_grad()
53 loss = model(u,i,j)
54 loss.backward()
55 optimizer.step()
56 running_loss += loss.item()
57 return running_loss
The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters. <Reference: Difference Between a Batch and Epoch in a Neural Network>
Typically, with neural networks, we seek to minimize the error. As such, the objective function is often referred to as a cost function or a loss function and the value calculated by the loss function is referred to as simply “loss.” <Reference: Loss and Loss Functions for Training Deep Learning Neural Networks>