From 5e28dfb721ae89fcf83401581c145cda78a962bd Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Tue, 15 Nov 2022 20:33:32 -0500
Subject: [PATCH 01/23] initial merge, awaiting test

---
 .../modules/graph_embedding_learning/rgcn.py  | 255 ++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
new file mode 100644
index 00000000..49324004
--- /dev/null
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -0,0 +1,255 @@
+import dgl
+import dgl.function as fn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import GNNBase, GNNLayerBase
+
+
+class RGCN(GNNBase):
+    r"""Multi-layered `RGCN Network <TODO:paper.pdf>`__
+
+    .. math::
+        TODO:Add Calculation.
+
+    Parameters
+    ----------
+    num_layers: int
+        Number of RGCN layers.
+    input_size : int, or pair of ints
+        Input feature size.
+    hidden_size: int list of int
+        Hidden layer size.
+        If a scalar is given, the sizes of all the hidden layers are the same.
+        If a list of scalar is given, each element in the list is the size of each hidden layer.
+        Example: [100,50]
+    output_size : int
+        Output feature size.
+    num_rels : int
+        Number of relations.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    use_self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    dropout : float, optional
+        Dropout rate. Default: ``0.0``
+    """
+
+    def __init__(
+        self,
+        num_layers,
+        input_size,
+        hidden_size,
+        output_size,
+        num_rels,
+        num_bases=None,
+        use_self_loop=True,
+        dropout=0.0,
+        device="cuda",
+    ):
+        super(RGCN, self).__init__()
+        self.num_layers = num_layers
+        self.num_rels = num_rels
+        self.num_bases = num_bases
+        self.use_self_loop = use_self_loop
+        self.dropout = dropout
+        self.device = device
+
+        self.RGCN_layers = nn.ModuleList()
+
+        # transform the hidden size format
+        if self.num_layers > 1 and type(hidden_size) is int:
+            hidden_size = [hidden_size for i in range(self.num_layers - 1)]
+
+        if self.num_layers > 1:
+            # input projection
+            self.RGCN_layers.append(
+                RGCNLayer(
+                    input_size,
+                    hidden_size[0],
+                    num_rels=self.num_rels,
+                    regularizer="basis",
+                    num_bases=self.num_bases,
+                    bias=True,
+                    activation=F.relu,
+                    self_loop=self.use_self_loop,
+                    dropout=self.dropout,
+                )
+            )
+        # hidden layers
+        for l in range(1, self.num_layers - 1):
+            # due to multi-head, the input_size = hidden_size * num_heads
+            self.RGCN_layers.append(
+                RGCNLayer(
+                    hidden_size[l - 1],
+                    hidden_size[l],
+                    num_rels=self.num_rels,
+                    regularizer="basis",
+                    num_bases=self.num_bases,
+                    bias=True,
+                    activation=F.relu,
+                    self_loop=self.use_self_loop,
+                    dropout=self.dropout,
+                )
+            )
+        # output projection
+        self.RGCN_layers.append(
+            RGCNLayer(
+                hidden_size[-1] if self.num_layers > 1 else input_size,
+                output_size,
+                num_rels=self.num_rels,
+                regularizer="basis",
+                num_bases=self.num_bases,
+                bias=True,
+                activation=F.relu,
+                self_loop=self.use_self_loop,
+                dropout=self.dropout,
+            )
+        )
+
+    def forward(self, graph):
+        r"""Compute RGCN layer.
+
+        Parameters
+        ----------
+        graph : GraphData
+            The graph with node feature stored in the feature field named as
+            "node_feat".
+            The node features are used for message passing.
+
+        Returns
+        -------
+        graph : GraphData
+            The graph with generated node embedding stored in the feature field
+            named as "node_emb".
+        """
+
+        h = graph.node_features["node_feat"]
+        # get the node feature tensor from graph
+        g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
+        # edge_type = g.edata[dgl.ETYPE].long()
+        # output projection
+        if self.num_layers > 1:
+            for l in range(0, self.num_layers - 1):
+                h = self.RGCN_layers[l](g, h)
+
+        logits = self.RGCN_layers[-1](g, h)
+
+        graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
+        return graph
+
+
+class RGCNLayer(GNNLayerBase):
+    r"""A wrapper for RelGraphConv in DGL.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    dropout : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        dropout=0.0,
+        layer_norm=False,
+        device="cuda",
+    ):
+        super(RGCNLayer, self).__init__()
+        self.linear_dict = {
+            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+        }
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+        self.device = device
+
+        # bias
+        if self.bias:
+            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            nn.init.zeros_(self.h_bias)
+
+        # TODO(minjie): consider remove those options in the future to make
+        #   the module only about graph convolution.
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight = nn.LayerNorm(
+                output_size, elementwise_affine=True, device=device
+            )
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g):
+            """Message function."""
+            ln = self.linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight(h)
+            if self.bias:
+                h = h + self.h_bias
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight
+            if self.activation:
+                h = self.activation(h)
+            h = self.dropout(h)
+            return h

From 1d794a9950a085f12f13559e88a3bd1900f0e1b5 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Thu, 17 Nov 2022 16:23:12 -0500
Subject: [PATCH 02/23] add library code and test

---
 .../modules/graph_embedding_learning/rgcn.py  |   4 +-
 .../pytorch/test/graph_embedding/run_rgcn.py  | 191 ++++++++++++++++++
 2 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 49324004..de9e2714 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -46,7 +46,7 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -185,7 +185,7 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
new file mode 100644
index 00000000..7ae1117e
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -0,0 +1,191 @@
+import argparse
+import torch
+import dgl
+import time
+import torch.nn.functional as F
+from torchmetrics.functional import accuracy
+from ...modules.graph_embedding_learning.rgcn import RGCN
+from ...data.data import from_dgl
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
+
+
+# Fix random seed
+# torch.manual_seed(1024)
+# import random
+# random.seed(1024)
+# import numpy as np
+# np.random.seed(1024)
+
+# Load dataset 
+# Reference: dgl/examples/pytorch/rgcn/entity_utils.py (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
+def load_data(data_name='aifb', get_norm=False, inv_target=False):
+    if data_name == 'aifb':
+        dataset = AIFBDataset()
+        # Test Accuracy:
+        # 0.9444, 0.8889, 0.9722, 0.9167, 0.9444 without enorm.
+        # 0.8611, 0.8889, 0.8889, 0.8889, 0.8333
+        # avg: 0.93332 (without enorm)
+        # avg: 0.87222
+        # DGL: 0.8889, 0.8889, 0.8056, 0.8889, 0.8611
+        # DGL avg: 0.86668
+        # paper: 0.9583
+        # note: Could stuck at Local minimum of train loss between 0.2-0.35.
+    elif data_name == 'mutag':
+        dataset = MUTAGDataset()
+        # Test Accuracy:
+        # 0.6912, 0.7500, 0.7353, 0.6324, 0.7353
+        # avg: 0.68884
+        # DGL: 0.6765, 0.7059, 0.7353, 0.6765, 0.6912
+        # DGL avg: 0.69724
+        # paper: 0.7323
+        # note: Could stuck at local minimum of train acc: 0.3897 & loss 0.6931
+    elif data_name == 'bgs':
+        dataset = BGSDataset()
+        # Test Accuracy:
+        # 0.8966, 0.9310, 0.8966, 0.7931, 0.8621
+        # avg: 0.87588
+        # DGL: 0.7931, 0.9310, 0.8966, 0.8276, 0.8966
+        # DGL avg: 0.86898          
+        # paper: 0.8310
+        # note: Could stuck at local minimum of train acc: 0.6325 & loss: 0.6931
+    else:
+        dataset = AMDataset()
+        # Test Accuracy:
+        # 0.7525, 0.7374, 0.7424, 0.7424, 0.7424
+        # avg: 0.74342
+        # DGL: 0.7677, 0.7677, 0.7323, 0.7879, 0.7677
+        # DGL avg: 0.76466
+        # paper: 0.8929
+        # note: args.hidden_size is 10. 
+        # Could stuck at local minimum of train loss: 0.3-0.5
+
+    # Load hetero-graph
+    hg = dataset[0]
+
+    num_rels = len(hg.canonical_etypes)
+    category = dataset.predict_category
+    num_classes = dataset.num_classes
+    labels = hg.nodes[category].data.pop('labels')
+    train_mask = hg.nodes[category].data.pop('train_mask')
+    test_mask = hg.nodes[category].data.pop('test_mask')
+    train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
+    test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
+
+    if get_norm:
+        # Calculate normalization weight for each edge,
+        # 1. / d, d is the degree of the destination node
+        for cetype in hg.canonical_etypes:
+            hg.edges[cetype].data['norm'] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
+        edata = ['norm']
+    else:
+        edata = None
+    category_id = hg.ntypes.index(category)
+    g = dgl.to_homogeneous(hg, edata=edata)
+    node_ids = torch.arange(g.num_nodes())
+
+    # find out the target node ids in g
+    loc = (g.ndata['_TYPE'] == category_id)
+    target_idx = node_ids[loc]
+
+    if inv_target:
+        # Map global node IDs to type-specific node IDs. This is required for
+        # looking up type-specific labels in a minibatch
+        inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
+        inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
+                                           dtype=inv_target.dtype)
+        return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
+    else:
+        return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
+
+
+def main(args):
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=args.dataset, get_norm=True)
+
+    # graph = from_dgl(g, is_hetero=False)
+    graph = from_dgl(g)
+    num_nodes = graph.get_node_num()
+    emb = torch.nn.Embedding(num_nodes, args.hidden_size)
+    # emb.requires_grad = True
+    graph.node_features['node_feat'] = emb.weight
+    
+    model = RGCN(num_layers=args.num_hidden_layers, 
+                 input_size=args.hidden_size,
+                 hidden_size=args.hidden_size,
+                 output_size=num_classes,
+                 num_rels=num_rels,
+                 num_bases=args.num_bases,
+                 use_self_loop=args.use_self_loop,
+                #  gpu=args.gpu,
+                 dropout = args.dropout,
+                 device='cpu')
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+    print("start training...")
+    model.train()
+    for epoch in range(args.num_epochs):
+        logits = model(graph).node_features["node_emb"]
+        logits = logits[target_idx]
+        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
+    
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item()
+        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(epoch, train_acc, loss.item()))
+    print()
+    # Save Model
+    # torch.save(model.state_dict(), "./rgcn_model.pt")
+    print("start evaluating...")
+    model.eval()
+    with torch.no_grad():
+        logits = model(graph).node_features["node_emb"]
+    logits = logits[target_idx]
+    test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
+    print("Test Accuracy: {:.4f}".format(test_acc))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='RGCN for entity classification')
+    parser.add_argument("--num-hidden-layers", type=int, default=1,
+                        help="number of hidden layers beside input/output layer")
+    parser.add_argument("--hidden-size", type=int, default=16,
+                        help="dimension of hidden layer")
+    parser.add_argument("--gpu", type=int, default=-1,
+                        help="GPU device number, -1 for cpu")
+    parser.add_argument("--num-bases", type=int, default=-1,
+                        help="number of filter weight matrices, default: -1 [use all]")
+    parser.add_argument("-d", "--dataset", type=str, required=True,
+                        choices=['aifb', 'mutag', 'bgs', 'am'],
+                        help="dataset to use")
+    parser.add_argument("--use-self-loop", type=bool, default=False,
+                        help="Consider self-loop edges or not")
+    parser.add_argument("--dropout", type=float, default=0.0,
+                        help="Dropout rate")
+    parser.add_argument("--lr", type=float, default=1e-2,
+                        help="Start learning rate")
+    parser.add_argument("--wd", type=float, default=5e-4,
+                        help="weight decay")
+    parser.add_argument("--num-epochs", type=int, default=50,
+                        help="Number of training epochs")
+
+    args = parser.parse_args()
+    print(args)
+    main(args)
+
+
+
+
+
+"""Deprecated RGCN code on Heterogeneous graph due to 
+the lack of support from data structure. The following supports
+are needed (but not limit to):
+- Redefine the feature data structure of node/edge
+    - Index node/edge ids by their type.
+    - Enable type indexed features.
+- Make corresponding changes on views.
+- Make corresponding changes on set/get features functions.
+
+This example bypasses it by storing the features in the model
+itself. It is a code trick and therefore not recommended to
+the user.
+"""
\ No newline at end of file

From 5d57dabfee27922752c78e81604fac3e37223660 Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Fri, 18 Nov 2022 18:12:57 -0800
Subject: [PATCH 03/23] add rgcn for QG

---
 .../config/squad_split2/rgcn_dependency.json         | 12 ++++++++++++
 examples/pytorch/question_generation/main.py         |  6 +++++-
 examples/pytorch/rgcn/rgcn.py                        | 12 ++++--------
 .../semantic_parsing/graph2seq/rgcn_lib/graph2seq.py |  2 +-
 graph4nlp/pytorch/models/graph2seq.py                |  2 +-
 5 files changed, 23 insertions(+), 11 deletions(-)
 create mode 100644 examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json

diff --git a/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json b/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json
new file mode 100644
index 00000000..3536da4e
--- /dev/null
+++ b/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json
@@ -0,0 +1,12 @@
+{
+"config_path": "examples/pytorch/question_generation/config/squad_split2/qg.yaml",
+"model_args.graph_construction_args.graph_construction_share.topology_subdir": "DependencyGraphForRGCN",
+"model_args.graph_construction_args.graph_construction_private.edge_strategy": "heterogeneous",
+"model_args.graph_construction_args.graph_construction_private.merge_strategy": "tailhead",
+"model_args.graph_construction_args.graph_construction_private.sequential_link": true,
+"model_args.graph_construction_args.graph_construction_private.as_node": false,
+"model_args.graph_embedding_name": "rgcn",
+"model_args.graph_embedding_args.graph_embedding_private.num_rels": 80,
+"model_args.graph_embedding_args.graph_embedding_private.num_bases": 4,
+"checkpoint_args.out_dir": "out/squad_split2/rgcn_dependency_ckpt"
+}
diff --git a/examples/pytorch/question_generation/main.py b/examples/pytorch/question_generation/main.py
index 326c9a80..450c1ded 100644
--- a/examples/pytorch/question_generation/main.py
+++ b/examples/pytorch/question_generation/main.py
@@ -27,6 +27,7 @@
 from graph4nlp.pytorch.modules.utils.logger import Logger
 
 from .fused_embedding_construction import FusedEmbeddingConstruction
+from examples.pytorch.semantic_parsing.graph2seq.rgcn_lib.graph2seq import RGCNGraph2Seq
 
 
 class QGModel(nn.Module):
@@ -39,7 +40,10 @@ def __init__(self, vocab, config):
         ]
 
         # build Graph2Seq model
-        self.g2s = Graph2Seq.from_args(config, self.vocab)
+        if config["model_args"]["graph_embedding_name"] == "rgcn":
+            self.g2s = RGCNGraph2Seq.from_args(config, self.vocab)
+        else:
+            self.g2s = Graph2Seq.from_args(config, self.vocab)
 
         if "w2v" in self.g2s.graph_initializer.embedding_layer.word_emb_layers:
             self.word_emb = self.g2s.graph_initializer.embedding_layer.word_emb_layers[
diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index 0779e904..50f0a00b 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -54,7 +53,6 @@ def __init__(
         self.num_bases = num_bases
         self.use_self_loop = use_self_loop
         self.dropout = dropout
-        self.device = device
 
         self.RGCN_layers = nn.ModuleList()
 
@@ -185,22 +183,20 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
         self.self_loop = self_loop
         self.layer_norm = layer_norm
-        self.device = device
 
         # bias
         if self.bias:
-            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
         # TODO(minjie): consider remove those options in the future to make
@@ -208,12 +204,12 @@ def __init__(
         # layer norm
         if self.layer_norm:
             self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True, device=device
+                output_size, elementwise_affine=True
             )
 
         # weight for self loop
         if self.self_loop:
-            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
         self.dropout = nn.Dropout(dropout)
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 917264c8..f333071e 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -90,5 +90,5 @@ def _build_gnn_encoder(
             output_size,
             num_rels=gnn_num_rels,
             num_bases=gnn_num_bases,
-            dropout=feats_dropout,
+            dropout=feats_dropout
         )
diff --git a/graph4nlp/pytorch/models/graph2seq.py b/graph4nlp/pytorch/models/graph2seq.py
index 682da3e5..5b2ea919 100644
--- a/graph4nlp/pytorch/models/graph2seq.py
+++ b/graph4nlp/pytorch/models/graph2seq.py
@@ -26,7 +26,7 @@ class Graph2Seq(Graph2XBase):
         >>> "It is just a how-to-use example."
         >>> from graph4nlp.pytorch.modules.config import get_basic_args
         >>> opt = get_basic_args(graph_construction_name="node_emb", graph_embedding_name="gat", decoder_name="stdrnn")
-        >>> graph2seq = Graph2Seq.from_args(opt=opt, vocab_model=vocab_model, device=torch.device("cuda:0"))
+        >>> graph2seq = Graph2Seq.from_args(opt=opt, vocab_model=vocab_model)
         >>> batch_graph = [GraphData() for _ in range(2)]
         >>> tgt_seq = torch.Tensor([[1, 2, 3], [4, 5, 6]])
         >>> seq_out, _, _ = graph2seq(batch_graph=batch_graph, tgt_seq=tgt_seq)

From c4437c1af163b5b109c89ed6ae02aacc5cd2e1f4 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 21:41:34 -0500
Subject: [PATCH 04/23] update config

---
 .../pytorch/test/graph_embedding/run_rgcn.py  | 104 ++++++++----------
 .../test/graph_embedding/run_rgcn.yaml        |  10 ++
 2 files changed, 57 insertions(+), 57 deletions(-)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 7ae1117e..40efd7de 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -6,6 +6,7 @@
 from torchmetrics.functional import accuracy
 from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...data.data import from_dgl
+from ...modules.utils.generic_utils import get_config
 from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
 
 
@@ -98,30 +99,30 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
-def main(args):
-    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=args.dataset, get_norm=True)
+def main(config):
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=config['dataset'], get_norm=True)
 
     # graph = from_dgl(g, is_hetero=False)
     graph = from_dgl(g)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, args.hidden_size)
+    emb = torch.nn.Embedding(num_nodes, config['hidden_size'])
     # emb.requires_grad = True
     graph.node_features['node_feat'] = emb.weight
     
-    model = RGCN(num_layers=args.num_hidden_layers, 
-                 input_size=args.hidden_size,
-                 hidden_size=args.hidden_size,
+    model = RGCN(num_layers=config['num_hidden_layers'], 
+                 input_size=config['hidden_size'],
+                 hidden_size=config['hidden_size'],
                  output_size=num_classes,
                  num_rels=num_rels,
-                 num_bases=args.num_bases,
-                 use_self_loop=args.use_self_loop,
-                #  gpu=args.gpu,
-                 dropout = args.dropout,
-                 device='cpu')
-    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+                 num_bases=config['num_bases'],
+                 use_self_loop=config['use_self_loop'],
+                #  gpu=config.gpu,
+                 dropout = config['dropout'],
+                 device='cpu' if config['gpu'] == -1 else 'cuda:'+str(config['gpu']))
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['wd'])
     print("start training...")
     model.train()
-    for epoch in range(args.num_epochs):
+    for epoch in range(config['num_epochs']):
         logits = model(graph).node_features["node_emb"]
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
@@ -145,47 +146,36 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='RGCN for entity classification')
-    parser.add_argument("--num-hidden-layers", type=int, default=1,
-                        help="number of hidden layers beside input/output layer")
-    parser.add_argument("--hidden-size", type=int, default=16,
-                        help="dimension of hidden layer")
-    parser.add_argument("--gpu", type=int, default=-1,
-                        help="GPU device number, -1 for cpu")
-    parser.add_argument("--num-bases", type=int, default=-1,
-                        help="number of filter weight matrices, default: -1 [use all]")
-    parser.add_argument("-d", "--dataset", type=str, required=True,
-                        choices=['aifb', 'mutag', 'bgs', 'am'],
-                        help="dataset to use")
-    parser.add_argument("--use-self-loop", type=bool, default=False,
-                        help="Consider self-loop edges or not")
-    parser.add_argument("--dropout", type=float, default=0.0,
-                        help="Dropout rate")
-    parser.add_argument("--lr", type=float, default=1e-2,
-                        help="Start learning rate")
-    parser.add_argument("--wd", type=float, default=5e-4,
-                        help="weight decay")
-    parser.add_argument("--num-epochs", type=int, default=50,
-                        help="Number of training epochs")
-
-    args = parser.parse_args()
-    print(args)
-    main(args)
-
-
-
-
-
-"""Deprecated RGCN code on Heterogeneous graph due to 
-the lack of support from data structure. The following supports
-are needed (but not limit to):
-- Redefine the feature data structure of node/edge
-    - Index node/edge ids by their type.
-    - Enable type indexed features.
-- Make corresponding changes on views.
-- Make corresponding changes on set/get features functions.
-
-This example bypasses it by storing the features in the model
-itself. It is a code trick and therefore not recommended to
-the user.
-"""
\ No newline at end of file
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-config", type=str, help="path to the config file")
+    parser.add_argument("--grid_search", action="store_true", help="flag: grid search")
+    cfg = vars(parser.parse_args())
+    config = get_config(cfg["config"])
+
+    # parser = argparse.ArgumentParser(description='RGCN for entity classification')
+    # parser.add_argument("--num-hidden-layers", type=int, default=1,
+    #                     help="number of hidden layers beside input/output layer")
+    # parser.add_argument("--hidden-size", type=int, default=16,
+    #                     help="dimension of hidden layer")
+    # parser.add_argument("--gpu", type=int, default=-1,
+    #                     help="GPU device number, -1 for cpu")
+    # parser.add_argument("--num-bases", type=int, default=-1,
+    #                     help="number of filter weight matrices, default: -1 [use all]")
+    # parser.add_argument("-d", "--dataset", type=str, required=True,
+    #                     choices=['aifb', 'mutag', 'bgs', 'am'],
+    #                     help="dataset to use")
+    # parser.add_argument("--use-self-loop", type=bool, default=False,
+    #                     help="Consider self-loop edges or not")
+    # parser.add_argument("--dropout", type=float, default=0.0,
+    #                     help="Dropout rate")
+    # parser.add_argument("--lr", type=float, default=1e-2,
+    #                     help="Start learning rate")
+    # parser.add_argument("--wd", type=float, default=5e-4,
+    #                     help="weight decay")
+    # parser.add_argument("--num-epochs", type=int, default=50,
+    #                     help="Number of training epochs")
+
+    # args = parser.parse_args()
+    # print(args)
+    print(config)
+    main(config)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
new file mode 100644
index 00000000..448fa495
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+gpu: -1
+num_bases: -1
+dataset: 'aifb'
+use_self_loop: False
+dropout: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 50

From ef70aa37d1f2a4132a382be025511a8916bfe643 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 21:54:36 -0500
Subject: [PATCH 05/23] format the script

---
 .../pytorch/test/graph_embedding/run_rgcn.py  | 87 ++++++++++---------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 40efd7de..441ef737 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,14 +1,13 @@
 import argparse
-import torch
 import dgl
-import time
+import torch
 import torch.nn.functional as F
+from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
 from torchmetrics.functional import accuracy
-from ...modules.graph_embedding_learning.rgcn import RGCN
+
 from ...data.data import from_dgl
+from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...modules.utils.generic_utils import get_config
-from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
-
 
 # Fix random seed
 # torch.manual_seed(1024)
@@ -17,10 +16,13 @@
 # import numpy as np
 # np.random.seed(1024)
 
-# Load dataset 
-# Reference: dgl/examples/pytorch/rgcn/entity_utils.py (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
-def load_data(data_name='aifb', get_norm=False, inv_target=False):
-    if data_name == 'aifb':
+# Load dataset
+# Reference: dgl/examples/pytorch/rgcn/entity_utils.py
+# (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
+
+
+def load_data(data_name="aifb", get_norm=False, inv_target=False):
+    if data_name == "aifb":
         dataset = AIFBDataset()
         # Test Accuracy:
         # 0.9444, 0.8889, 0.9722, 0.9167, 0.9444 without enorm.
@@ -31,7 +33,7 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL avg: 0.86668
         # paper: 0.9583
         # note: Could stuck at Local minimum of train loss between 0.2-0.35.
-    elif data_name == 'mutag':
+    elif data_name == "mutag":
         dataset = MUTAGDataset()
         # Test Accuracy:
         # 0.6912, 0.7500, 0.7353, 0.6324, 0.7353
@@ -40,13 +42,13 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL avg: 0.69724
         # paper: 0.7323
         # note: Could stuck at local minimum of train acc: 0.3897 & loss 0.6931
-    elif data_name == 'bgs':
+    elif data_name == "bgs":
         dataset = BGSDataset()
         # Test Accuracy:
         # 0.8966, 0.9310, 0.8966, 0.7931, 0.8621
         # avg: 0.87588
         # DGL: 0.7931, 0.9310, 0.8966, 0.8276, 0.8966
-        # DGL avg: 0.86898          
+        # DGL avg: 0.86898
         # paper: 0.8310
         # note: Could stuck at local minimum of train acc: 0.6325 & loss: 0.6931
     else:
@@ -57,7 +59,7 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL: 0.7677, 0.7677, 0.7323, 0.7879, 0.7677
         # DGL avg: 0.76466
         # paper: 0.8929
-        # note: args.hidden_size is 10. 
+        # note: args.hidden_size is 10.
         # Could stuck at local minimum of train loss: 0.3-0.5
 
     # Load hetero-graph
@@ -66,9 +68,9 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
     num_rels = len(hg.canonical_etypes)
     category = dataset.predict_category
     num_classes = dataset.num_classes
-    labels = hg.nodes[category].data.pop('labels')
-    train_mask = hg.nodes[category].data.pop('train_mask')
-    test_mask = hg.nodes[category].data.pop('test_mask')
+    labels = hg.nodes[category].data.pop("labels")
+    train_mask = hg.nodes[category].data.pop("train_mask")
+    test_mask = hg.nodes[category].data.pop("test_mask")
     train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
     test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
 
@@ -76,8 +78,8 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # Calculate normalization weight for each edge,
         # 1. / d, d is the degree of the destination node
         for cetype in hg.canonical_etypes:
-            hg.edges[cetype].data['norm'] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
-        edata = ['norm']
+            hg.edges[cetype].data["norm"] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
+        edata = ["norm"]
     else:
         edata = None
     category_id = hg.ntypes.index(category)
@@ -85,54 +87,61 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
     node_ids = torch.arange(g.num_nodes())
 
     # find out the target node ids in g
-    loc = (g.ndata['_TYPE'] == category_id)
+    loc = g.ndata["_TYPE"] == category_id
     target_idx = node_ids[loc]
 
     if inv_target:
         # Map global node IDs to type-specific node IDs. This is required for
         # looking up type-specific labels in a minibatch
         inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
-        inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
-                                           dtype=inv_target.dtype)
+        inv_target[target_idx] = torch.arange(0, target_idx.shape[0], dtype=inv_target.dtype)
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
     else:
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
 def main(config):
-    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=config['dataset'], get_norm=True)
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
+        data_name=config["dataset"], get_norm=True
+    )
 
     # graph = from_dgl(g, is_hetero=False)
     graph = from_dgl(g)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config['hidden_size'])
+    emb = torch.nn.Embedding(num_nodes, config["hidden_size"])
     # emb.requires_grad = True
-    graph.node_features['node_feat'] = emb.weight
-    
-    model = RGCN(num_layers=config['num_hidden_layers'], 
-                 input_size=config['hidden_size'],
-                 hidden_size=config['hidden_size'],
-                 output_size=num_classes,
-                 num_rels=num_rels,
-                 num_bases=config['num_bases'],
-                 use_self_loop=config['use_self_loop'],
-                #  gpu=config.gpu,
-                 dropout = config['dropout'],
-                 device='cpu' if config['gpu'] == -1 else 'cuda:'+str(config['gpu']))
-    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['wd'])
+    graph.node_features["node_feat"] = emb.weight
+
+    model = RGCN(
+        num_layers=config["num_hidden_layers"],
+        input_size=config["hidden_size"],
+        hidden_size=config["hidden_size"],
+        output_size=num_classes,
+        num_rels=num_rels,
+        num_bases=config["num_bases"],
+        use_self_loop=config["use_self_loop"],
+        #  gpu=config.gpu,
+        dropout=config["dropout"],
+        device="cpu" if config["gpu"] == -1 else "cuda:" + str(config["gpu"]),
+    )
+    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
     model.train()
-    for epoch in range(config['num_epochs']):
+    for epoch in range(config["num_epochs"]):
         logits = model(graph).node_features["node_emb"]
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
-    
+
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
 
         train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item()
-        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(epoch, train_acc, loss.item()))
+        print(
+            "Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(
+                epoch, train_acc, loss.item()
+            )
+        )
     print()
     # Save Model
     # torch.save(model.state_dict(), "./rgcn_model.pt")

From c20b9c227b2253c8e316d7794091f8e469ff10e1 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 22:56:40 -0500
Subject: [PATCH 06/23] remove device options from model

---
 .../modules/graph_embedding_learning/rgcn.py  | 14 +++-------
 .../pytorch/test/graph_embedding/run_rgcn.py  | 28 -------------------
 .../test/graph_embedding/run_rgcn.yaml        |  1 -
 3 files changed, 4 insertions(+), 39 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index de9e2714..5ff10129 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -54,7 +53,6 @@ def __init__(
         self.num_bases = num_bases
         self.use_self_loop = use_self_loop
         self.dropout = dropout
-        self.device = device
 
         self.RGCN_layers = nn.ModuleList()
 
@@ -185,35 +183,31 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
         self.self_loop = self_loop
         self.layer_norm = layer_norm
-        self.device = device
 
         # bias
         if self.bias:
-            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
         # TODO(minjie): consider remove those options in the future to make
         #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
-            self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True, device=device
-            )
+            self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
 
         # weight for self loop
         if self.self_loop:
-            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
         self.dropout = nn.Dropout(dropout)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 441ef737..f3438efe 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -120,9 +120,7 @@ def main(config):
         num_rels=num_rels,
         num_bases=config["num_bases"],
         use_self_loop=config["use_self_loop"],
-        #  gpu=config.gpu,
         dropout=config["dropout"],
-        device="cpu" if config["gpu"] == -1 else "cuda:" + str(config["gpu"]),
     )
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
@@ -160,31 +158,5 @@ def main(config):
     parser.add_argument("--grid_search", action="store_true", help="flag: grid search")
     cfg = vars(parser.parse_args())
     config = get_config(cfg["config"])
-
-    # parser = argparse.ArgumentParser(description='RGCN for entity classification')
-    # parser.add_argument("--num-hidden-layers", type=int, default=1,
-    #                     help="number of hidden layers beside input/output layer")
-    # parser.add_argument("--hidden-size", type=int, default=16,
-    #                     help="dimension of hidden layer")
-    # parser.add_argument("--gpu", type=int, default=-1,
-    #                     help="GPU device number, -1 for cpu")
-    # parser.add_argument("--num-bases", type=int, default=-1,
-    #                     help="number of filter weight matrices, default: -1 [use all]")
-    # parser.add_argument("-d", "--dataset", type=str, required=True,
-    #                     choices=['aifb', 'mutag', 'bgs', 'am'],
-    #                     help="dataset to use")
-    # parser.add_argument("--use-self-loop", type=bool, default=False,
-    #                     help="Consider self-loop edges or not")
-    # parser.add_argument("--dropout", type=float, default=0.0,
-    #                     help="Dropout rate")
-    # parser.add_argument("--lr", type=float, default=1e-2,
-    #                     help="Start learning rate")
-    # parser.add_argument("--wd", type=float, default=5e-4,
-    #                     help="weight decay")
-    # parser.add_argument("--num-epochs", type=int, default=50,
-    #                     help="Number of training epochs")
-
-    # args = parser.parse_args()
-    # print(args)
     print(config)
     main(config)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 448fa495..6240bc85 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,6 +1,5 @@
 num_hidden_layers: 1
 hidden_size: 16
-gpu: -1
 num_bases: -1
 dataset: 'aifb'
 use_self_loop: False

From ca03f95a7086f9c1167e8bdc844f8970ba367f59 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:42:33 -0500
Subject: [PATCH 07/23] add direction_options for rgcn

---
 .../modules/graph_embedding_learning/rgcn.py  | 485 ++++++++++++++++--
 .../pytorch/test/graph_embedding/run_rgcn.py  |   8 +-
 .../test/graph_embedding/run_rgcn.yaml        |   7 +-
 3 files changed, 464 insertions(+), 36 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 5ff10129..f98217fd 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -30,10 +30,10 @@ class RGCN(GNNBase):
         Number of relations.
     num_bases : int, optional
         Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
-    use_self_loop : bool, optional
+    self_loop : bool, optional
         True to include self loop message. Default: ``True``.
-    dropout : float, optional
-        Dropout rate. Default: ``0.0``
+    feat_drop : float, optional
+        dropout rate. Default: ``0.0``
     """
 
     def __init__(
@@ -43,17 +43,24 @@ def __init__(
         hidden_size,
         output_size,
         num_rels,
+        direction_option=None,
+        regularizer="basis",
+        bias=True,
+        activation=None,
         num_bases=None,
-        use_self_loop=True,
-        dropout=0.0,
+        self_loop=True,
+        feat_drop=0.0,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
         self.num_rels = num_rels
         self.num_bases = num_bases
-        self.use_self_loop = use_self_loop
-        self.dropout = dropout
-
+        self.self_loop = self_loop
+        self.feat_drop = feat_drop
+        self.direction_option = direction_option
+        self.regularizer = regularizer
+        self.activation = activation
+        self.bias = bias
         self.RGCN_layers = nn.ModuleList()
 
         # transform the hidden size format
@@ -67,12 +74,13 @@ def __init__(
                     input_size,
                     hidden_size[0],
                     num_rels=self.num_rels,
-                    regularizer="basis",
+                    direction_option=self.direction_option,
+                    regularizer=self.regularizer,
                     num_bases=self.num_bases,
-                    bias=True,
-                    activation=F.relu,
-                    self_loop=self.use_self_loop,
-                    dropout=self.dropout,
+                    bias=self.bias,
+                    activation=self.activation,
+                    self_loop=self.self_loop,
+                    feat_drop=self.feat_drop,
                 )
             )
         # hidden layers
@@ -83,12 +91,13 @@ def __init__(
                     hidden_size[l - 1],
                     hidden_size[l],
                     num_rels=self.num_rels,
-                    regularizer="basis",
+                    direction_option=self.direction_option,
+                    regularizer=self.regularizer,
                     num_bases=self.num_bases,
-                    bias=True,
-                    activation=F.relu,
-                    self_loop=self.use_self_loop,
-                    dropout=self.dropout,
+                    bias=self.bias,
+                    activation=self.activation,
+                    self_loop=self.self_loop,
+                    feat_drop=self.feat_drop,
                 )
             )
         # output projection
@@ -97,12 +106,13 @@ def __init__(
                 hidden_size[-1] if self.num_layers > 1 else input_size,
                 output_size,
                 num_rels=self.num_rels,
-                regularizer="basis",
+                direction_option=self.direction_option,
+                regularizer=self.regularizer,
                 num_bases=self.num_bases,
-                bias=True,
-                activation=F.relu,
-                self_loop=self.use_self_loop,
-                dropout=self.dropout,
+                bias=self.bias,
+                activation=self.activation,
+                self_loop=self.self_loop,
+                feat_drop=self.feat_drop,
             )
         )
 
@@ -122,8 +132,12 @@ def forward(self, graph):
             The graph with generated node embedding stored in the feature field
             named as "node_emb".
         """
-
-        h = graph.node_features["node_feat"]
+        feat = graph.node_features["node_feat"]
+        if self.direction_option == "bi_sep":
+            h = [feat, feat]
+        else:
+            h = feat
+        
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
         # edge_type = g.edata[dgl.ETYPE].long()
@@ -134,12 +148,15 @@ def forward(self, graph):
 
         logits = self.RGCN_layers[-1](g, h)
 
+        if self.direction_option == "bi_sep":
+            logits = torch.cat(logits, -1)
+
         graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
         return graph
 
 
 class RGCNLayer(GNNLayerBase):
-    r"""A wrapper for RelGraphConv in DGL.
+    r"""A wrapper for RGCNLayer.
 
     .. math::
         TODO
@@ -165,7 +182,7 @@ class RGCNLayer(GNNLayerBase):
         Activation function. Default: ``None``.
     self_loop : bool, optional
         True to include self loop message. Default: ``True``.
-    dropout : float, optional
+    feat_drop : float, optional
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
@@ -176,15 +193,126 @@ def __init__(
         input_size,
         output_size,
         num_rels,
+        direction_option=None,
         regularizer=None,
         num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
-        dropout=0.0,
+        feat_drop=0.0,
         layer_norm=False,
     ):
         super(RGCNLayer, self).__init__()
+        if direction_option == "undirected":
+            self.model = UndirectedRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        elif direction_option == "bi_sep":
+            self.model = BiSepRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        elif direction_option == "bi_fuse":
+            self.model = BiFuseRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        else:
+            raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
+
+    def forward(self, graph, feat):
+        r"""Compute graph attention network layer.
+
+        Parameters
+        ----------
+        graph : DGLGraph
+            The graph.
+        feat : torch.Tensor or pair of torch.Tensor
+            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
+            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
+            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
+            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
+
+        Returns
+        -------
+        torch.Tensor
+            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
+            is the number of heads, and :math:`D_{out}` is size of output feature.
+        """
+        return self.model(graph, feat)
+
+
+class UndirectedRGCNLayer(GNNLayerBase):
+    r"""An undirected RGCN layer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(UndirectedRGCNLayer, self).__init__()
         self.linear_dict = {
             i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
@@ -199,8 +327,6 @@ def __init__(
             self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
-        # TODO(minjie): consider remove those options in the future to make
-        #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
             self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
@@ -210,7 +336,7 @@ def __init__(
             self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
-        self.dropout = nn.Dropout(dropout)
+        self.dropout = nn.Dropout(feat_drop)
 
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
@@ -247,3 +373,300 @@ def message(edges, g):
                 h = self.activation(h)
             h = self.dropout(h)
             return h
+
+
+class BiFuseRGCNLayer(GNNLayerBase):
+    r"""A Bidirectional version for RGCNLayer, with an additional fuse layer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(BiFuseRGCNLayer, self).__init__()
+        self.linear_dict_forward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+        self.linear_dict_backward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+
+        # bias
+        if self.bias:
+            self.h_bias_forward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_forward)
+            self.h_bias_backward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_backward)
+
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight_forward = nn.LayerNorm(output_size, elementwise_affine=True)
+            self.layer_norm_weight_backward = nn.LayerNorm(output_size, elementwise_affine=True)
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight_forward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_forward, gain=nn.init.calculate_gain("relu"))
+
+            self.loop_weight_backward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_backward, gain=nn.init.calculate_gain("relu"))
+
+        self.fuse_linear = nn.Linear(4 * output_size, output_size, bias=True)
+        self.dropout = nn.Dropout(feat_drop)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g, direction):
+            """Message function."""
+            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_forward(h)
+            if self.bias:
+                h = h + self.h_bias_forward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+            h_forward = h
+        
+        g = g.reverse()
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_backward(h)
+            if self.bias:
+                h = h + self.h_bias_backward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+            h_backward = h
+        
+        fuse_vector = torch.cat([h_forward, h_backward, h_forward*h_backward, h_forward-h_backward], dim=-1)
+        fuse_gate_vector = torch.sigmoid(self.fuse_linear(fuse_vector))
+        h = fuse_gate_vector * h_forward + (1 - fuse_gate_vector) * h_backward
+        
+        if self.activation:
+            h = self.activation(h)
+        h = self.dropout(h)
+        return h
+
+
+class BiSepRGCNLayer(GNNLayerBase):
+    r"""A Bidirectional version for RGCNLayer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(BiSepRGCNLayer, self).__init__()
+        self.linear_dict_forward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+        self.linear_dict_backward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+
+        # bias
+        if self.bias:
+            self.h_bias_forward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_forward)
+            self.h_bias_backward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_backward)
+
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight_forward = nn.LayerNorm(output_size, elementwise_affine=True)
+            self.layer_norm_weight_backward = nn.LayerNorm(output_size, elementwise_affine=True)
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight_forward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_forward, gain=nn.init.calculate_gain("relu"))
+
+            self.loop_weight_backward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_backward, gain=nn.init.calculate_gain("relu"))
+
+        self.dropout = nn.Dropout(feat_drop)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g, direction):
+            """Message function."""
+            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+        feat_forward, feat_backward = feat
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat_forward
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_forward(h)
+            if self.bias:
+                h = h + self.h_bias_forward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+            h_forward = h
+        
+        g = g.reverse()
+        with g.local_scope():
+            g.srcdata["h"] = feat_backward
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_backward(h)
+            if self.bias:
+                h = h + self.h_bias_backward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+            h_backward = h
+        
+        if self.activation:
+            h_forward = self.activation(h_forward)
+            h_backward = self.activation(h_backward)
+        h_forward = self.dropout(h_forward)
+        h_backward = self.dropout(h_backward)
+        return [h_forward, h_backward]
\ No newline at end of file
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index f3438efe..77ff6e1d 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -117,10 +117,14 @@ def main(config):
         input_size=config["hidden_size"],
         hidden_size=config["hidden_size"],
         output_size=num_classes,
+        direction_option=config["direction_option"],
+        regularizer="basis",
+        bias=True,
+        activation=F.relu,
         num_rels=num_rels,
         num_bases=config["num_bases"],
-        use_self_loop=config["use_self_loop"],
-        dropout=config["dropout"],
+        self_loop=config["self_loop"],
+        feat_drop=config["feat_drop"],
     )
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 6240bc85..8042b72a 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -2,8 +2,9 @@ num_hidden_layers: 1
 hidden_size: 16
 num_bases: -1
 dataset: 'aifb'
-use_self_loop: False
-dropout: 0.0
+direction_option: "bi_fuse"
+self_loop: False
+feat_drop: 0.0
 lr: 0.01
 wd: 0.0005
-num_epochs: 50
+num_epochs: 150

From af5fc60e4d696c822bbae1e5db349c7b5d012215 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:46:54 -0500
Subject: [PATCH 08/23] format

---
 .../modules/graph_embedding_learning/rgcn.py  | 49 ++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index f98217fd..22cea2c3 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -2,7 +2,6 @@
 import dgl.function as fn
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from .base import GNNBase, GNNLayerBase
 
@@ -137,7 +136,7 @@ def forward(self, graph):
             h = [feat, feat]
         else:
             h = feat
-        
+
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
         # edge_type = g.edata[dgl.ETYPE].long()
@@ -214,7 +213,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         elif direction_option == "bi_sep":
             self.model = BiSepRGCNLayer(
@@ -227,7 +226,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         elif direction_option == "bi_fuse":
             self.model = BiFuseRGCNLayer(
@@ -240,7 +239,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         else:
             raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
@@ -299,6 +298,7 @@ class UndirectedRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -407,6 +407,7 @@ class BiFuseRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -460,7 +461,9 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            linear_dict = (
+                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            )
             ln = linear_dict[g.canonical_etypes.index(edges._etype)]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
@@ -477,7 +480,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="forward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -490,7 +494,7 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
-        
+
         g = g.reverse()
         with g.local_scope():
             g.srcdata["h"] = feat
@@ -501,7 +505,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="backward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -514,11 +519,13 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
-        
-        fuse_vector = torch.cat([h_forward, h_backward, h_forward*h_backward, h_forward-h_backward], dim=-1)
+
+        fuse_vector = torch.cat(
+            [h_forward, h_backward, h_forward * h_backward, h_forward - h_backward], dim=-1
+        )
         fuse_gate_vector = torch.sigmoid(self.fuse_linear(fuse_vector))
         h = fuse_gate_vector * h_forward + (1 - fuse_gate_vector) * h_backward
-        
+
         if self.activation:
             h = self.activation(h)
         h = self.dropout(h)
@@ -557,6 +564,7 @@ class BiSepRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -609,12 +617,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            linear_dict = (
+                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            )
             ln = linear_dict[g.canonical_etypes.index(edges._etype)]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
+
         feat_forward, feat_backward = feat
         # self.presorted = presorted
         with g.local_scope():
@@ -626,7 +637,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="forward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -639,7 +651,7 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
-        
+
         g = g.reverse()
         with g.local_scope():
             g.srcdata["h"] = feat_backward
@@ -650,7 +662,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="backward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -663,10 +676,10 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
-        
+
         if self.activation:
             h_forward = self.activation(h_forward)
             h_backward = self.activation(h_backward)
         h_forward = self.dropout(h_forward)
         h_backward = self.dropout(h_backward)
-        return [h_forward, h_backward]
\ No newline at end of file
+        return [h_forward, h_backward]

From e1fa01d551fe3b73e892d7e94374b298c5904566 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:49:28 -0500
Subject: [PATCH 09/23] remove unused parameters

---
 .../modules/graph_embedding_learning/rgcn.py  | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 22cea2c3..f61c59df 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -43,21 +43,17 @@ def __init__(
         output_size,
         num_rels,
         direction_option=None,
-        regularizer="basis",
         bias=True,
         activation=None,
-        num_bases=None,
         self_loop=True,
         feat_drop=0.0,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
         self.num_rels = num_rels
-        self.num_bases = num_bases
         self.self_loop = self_loop
         self.feat_drop = feat_drop
         self.direction_option = direction_option
-        self.regularizer = regularizer
         self.activation = activation
         self.bias = bias
         self.RGCN_layers = nn.ModuleList()
@@ -74,8 +70,6 @@ def __init__(
                     hidden_size[0],
                     num_rels=self.num_rels,
                     direction_option=self.direction_option,
-                    regularizer=self.regularizer,
-                    num_bases=self.num_bases,
                     bias=self.bias,
                     activation=self.activation,
                     self_loop=self.self_loop,
@@ -91,8 +85,6 @@ def __init__(
                     hidden_size[l],
                     num_rels=self.num_rels,
                     direction_option=self.direction_option,
-                    regularizer=self.regularizer,
-                    num_bases=self.num_bases,
                     bias=self.bias,
                     activation=self.activation,
                     self_loop=self.self_loop,
@@ -106,8 +98,6 @@ def __init__(
                 output_size,
                 num_rels=self.num_rels,
                 direction_option=self.direction_option,
-                regularizer=self.regularizer,
-                num_bases=self.num_bases,
                 bias=self.bias,
                 activation=self.activation,
                 self_loop=self.self_loop,
@@ -193,8 +183,6 @@ def __init__(
         output_size,
         num_rels,
         direction_option=None,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -207,8 +195,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -220,8 +206,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -233,8 +217,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -304,8 +286,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -413,8 +393,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -570,8 +548,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,

From 279e0b020ee5418e4ce039ac6888692c57a4a1e0 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 11:08:54 -0500
Subject: [PATCH 10/23] isort fix

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 77ff6e1d..3e2ce321 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,4 +1,5 @@
 import argparse
+
 import dgl
 import torch
 import torch.nn.functional as F

From 124c20580a9a5930871d3d9418281eb6fb859a45 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 11:13:40 -0500
Subject: [PATCH 11/23] isort fix again

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 3e2ce321..dc4d97a1 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,5 +1,4 @@
 import argparse
-
 import dgl
 import torch
 import torch.nn.functional as F
@@ -10,18 +9,10 @@
 from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...modules.utils.generic_utils import get_config
 
-# Fix random seed
-# torch.manual_seed(1024)
-# import random
-# random.seed(1024)
-# import numpy as np
-# np.random.seed(1024)
 
 # Load dataset
 # Reference: dgl/examples/pytorch/rgcn/entity_utils.py
 # (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
-
-
 def load_data(data_name="aifb", get_norm=False, inv_target=False):
     if data_name == "aifb":
         dataset = AIFBDataset()

From a6c1d4579181908cec6b8bad0fee82f0210b2b2e Mon Sep 17 00:00:00 2001
From: AlanSwift <shenkai200861@gmail.com>
Date: Sat, 19 Nov 2022 16:39:07 +0000
Subject: [PATCH 12/23] fix ci

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index dc4d97a1..2df2b32a 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -3,6 +3,7 @@
 import torch
 import torch.nn.functional as F
 from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
+
 from torchmetrics.functional import accuracy
 
 from ...data.data import from_dgl

From e5d53a6defaede2a4e29f5b086c9d3ca4bed6f89 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 14:18:54 -0500
Subject: [PATCH 13/23] update rgcn & test case

---
 examples/pytorch/rgcn/rgcn.py                          |  4 ++--
 .../semantic_parsing/graph2seq/rgcn_lib/graph2seq.py   | 10 ++++++----
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py     |  4 +---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml   |  8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index 0779e904..95b518a0 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,7 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -185,7 +185,7 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 917264c8..38ca29a9 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -1,7 +1,7 @@
 from graph4nlp.pytorch.models.graph2seq import Graph2Seq
 
-from examples.pytorch.rgcn.rgcn import RGCN
-
+# from examples.pytorch.rgcn.rgcn import RGCN
+from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
 class RGCNGraph2Seq(Graph2Seq):
     def __init__(
@@ -89,6 +89,8 @@ def _build_gnn_encoder(
             hidden_size,
             output_size,
             num_rels=gnn_num_rels,
-            num_bases=gnn_num_bases,
-            dropout=feats_dropout,
+            direction_option="undirected",
+            # num_bases=gnn_num_bases,
+            # dropout=feats_dropout,
+            feat_drop=feats_dropout
         )
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 2df2b32a..cdb6c5f7 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -111,11 +111,9 @@ def main(config):
         hidden_size=config["hidden_size"],
         output_size=num_classes,
         direction_option=config["direction_option"],
-        regularizer="basis",
-        bias=True,
+        bias=config['bias'],
         activation=F.relu,
         num_rels=num_rels,
-        num_bases=config["num_bases"],
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
     )
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 8042b72a..86544beb 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,10 +1,10 @@
-num_hidden_layers: 1
-hidden_size: 16
-num_bases: -1
+num_hidden_layers: 3
+hidden_size: 32
 dataset: 'aifb'
 direction_option: "bi_fuse"
 self_loop: False
+bias: True
 feat_drop: 0.0
 lr: 0.01
 wd: 0.0005
-num_epochs: 150
+num_epochs: 200

From 570dc89110ce1e6e0e17e64e7b1842ceed8c9460 Mon Sep 17 00:00:00 2001
From: AlanSwift <shenkai200861@gmail.com>
Date: Thu, 24 Nov 2022 02:10:46 +0000
Subject: [PATCH 14/23] fix

---
 .../config/train_dep_rgcn_bi_sep.json         |  5 ++++
 .../config/train_dep_rgcn_undirected.json     |  5 ++++
 .../semantic_parsing/graph2seq/main_rgcn.py   |  2 +-
 .../graph2seq/rgcn_lib/graph2seq.py           |  4 +++-
 .../modules/graph_embedding_learning/rgcn.py  | 23 ++++++++++---------
 5 files changed, 26 insertions(+), 13 deletions(-)
 create mode 100644 examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
 create mode 100644 examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json

diff --git a/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
new file mode 100644
index 00000000..b19c6336
--- /dev/null
+++ b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
@@ -0,0 +1,5 @@
+{
+    "config_path": "examples/pytorch/semantic_parsing/graph2seq/config/dependency_rgcn_undirected.yaml",
+    "model_args.graph_embedding_args.graph_embedding_share.direction_option": "bi_sep",
+    "training_args.log_file": "examples/pytorch/semantic_parsing/graph2seq/log/dependency_rgcn_bi_sep.txt"
+}
diff --git a/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json
new file mode 100644
index 00000000..77deec0e
--- /dev/null
+++ b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json
@@ -0,0 +1,5 @@
+{
+    "config_path": "examples/pytorch/semantic_parsing/graph2seq/config/dependency_rgcn_undirected.yaml",
+    "model_args.graph_embedding_args.graph_embedding_share.direction_option": "undirected",
+    "training_args.log_file": "examples/pytorch/semantic_parsing/graph2seq/log/dependency_rgcn_undirected.txt"
+}
diff --git a/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py b/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
index 5b3b7a24..eb683ebc 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
@@ -52,7 +52,7 @@ def _build_logger(self, log_file):
         import os
 
         log_folder = os.path.split(log_file)[0]
-        if not os.path.exists(log_file):
+        if not os.path.exists(log_folder):
             os.makedirs(log_folder)
         self.logger = get_log(log_file)
 
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 38ca29a9..07f85048 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -74,10 +74,12 @@ def __init__(
 
     def _build_gnn_encoder(
         self,
+        gnn,
         num_layers,
         input_size,
         hidden_size,
         output_size,
+        direction_option,
         feats_dropout,
         gnn_num_rels=80,
         gnn_num_bases=4,
@@ -89,7 +91,7 @@ def _build_gnn_encoder(
             hidden_size,
             output_size,
             num_rels=gnn_num_rels,
-            direction_option="undirected",
+            direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
             feat_drop=feats_dropout
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index f61c59df..53eace14 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -293,9 +293,9 @@ def __init__(
         layer_norm=False,
     ):
         super(UndirectedRGCNLayer, self).__init__()
-        self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        self.linear_dict = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
@@ -321,7 +321,7 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
             """Message function."""
-            ln = self.linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
@@ -555,12 +555,13 @@ def __init__(
         layer_norm=False,
     ):
         super(BiSepRGCNLayer, self).__init__()
-        self.linear_dict_forward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
-        self.linear_dict_backward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        
+        self.linear_dict_forward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
+        self.linear_dict_backward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -596,7 +597,7 @@ def message(edges, g, direction):
             linear_dict = (
                 self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
             )
-            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]

From 06703a4a136ab23530c3b6baf1646fced9440894 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Wed, 23 Nov 2022 21:29:17 -0500
Subject: [PATCH 15/23] bug fix on bi sep

---
 graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py | 4 ++--
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml       | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 53eace14..cc681cd6 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -626,7 +626,7 @@ def message(edges, g, direction):
             if self.bias:
                 h = h + self.h_bias_forward
             if self.self_loop:
-                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+                h = h + feat_forward[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
 
         g = g.reverse()
@@ -651,7 +651,7 @@ def message(edges, g, direction):
             if self.bias:
                 h = h + self.h_bias_backward
             if self.self_loop:
-                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+                h = h + feat_backward[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
 
         if self.activation:
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 86544beb..38c59cf6 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,7 +1,7 @@
-num_hidden_layers: 3
-hidden_size: 32
+num_hidden_layers: 1
+hidden_size: 16
 dataset: 'aifb'
-direction_option: "bi_fuse"
+direction_option: "undirected"
 self_loop: False
 bias: True
 feat_drop: 0.0

From 5b5f77a9cfc7ea13b48b9b0c0a851f2c011eb096 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Wed, 23 Nov 2022 21:37:23 -0500
Subject: [PATCH 16/23] bug fix on bi_fuse

---
 .../modules/graph_embedding_learning/rgcn.py       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cc681cd6..b1e024b8 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -400,12 +400,12 @@ def __init__(
         layer_norm=False,
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
-        self.linear_dict_backward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        self.linear_dict_forward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
+        self.linear_dict_backward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -442,7 +442,7 @@ def message(edges, g, direction):
             linear_dict = (
                 self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
             )
-            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]

From 6b0bdc8ed1c188bb58b23e45aa13eb03e5c5f263 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Thu, 24 Nov 2022 16:46:17 +0800
Subject: [PATCH 17/23] Change implementation of RGCN linear layer to DGL impl

---
 .../modules/graph_embedding_learning/rgcn.py  | 55 +++++++++++++------
 .../pytorch/test/graph_embedding/run_rgcn.py  | 10 +++-
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index b1e024b8..b0a10df9 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -1,5 +1,6 @@
 import dgl
 import dgl.function as fn
+from dgl.nn.pytorch.linear import TypedLinear
 import torch
 import torch.nn as nn
 
@@ -47,6 +48,8 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
+        regularizer=None,
+        num_basis=None,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -291,11 +294,20 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None,
     ):
         super(UndirectedRGCNLayer, self).__init__()
-        self.linear_dict = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+        # self.linear_dict = nn.ModuleDict({
+        #     str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        # })
+        self.linear = TypedLinear(
+            in_size=input_size,
+            out_size=output_size,
+            num_types=num_rels,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
@@ -321,8 +333,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
             """Message function."""
-            ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # ln = self.linear(edges.src['h'], edges.data['type'])
+            # ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            # m = ln(edges.src["h"])
+
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = self.linear(edges.src["h"], etypes)
+
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
@@ -400,12 +419,12 @@ def __init__(
         layer_norm=False,
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
-        self.linear_dict_backward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+        self.linear_dict_forward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
+        self.linear_dict_backward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -555,13 +574,13 @@ def __init__(
         layer_norm=False,
     ):
         super(BiSepRGCNLayer, self).__init__()
-        
-        self.linear_dict_forward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
-        self.linear_dict_backward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+
+        self.linear_dict_forward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
+        self.linear_dict_backward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index cdb6c5f7..77ea611f 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -99,9 +99,11 @@ def main(config):
     )
 
     # graph = from_dgl(g, is_hetero=False)
-    graph = from_dgl(g)
+    device = 'cuda:0'
+    graph = from_dgl(g).to(device)
+    labels = labels.to(device)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config["hidden_size"])
+    emb = torch.nn.Embedding(num_nodes, config["hidden_size"]).to(device)
     # emb.requires_grad = True
     graph.node_features["node_feat"] = emb.weight
 
@@ -116,7 +118,9 @@ def main(config):
         num_rels=num_rels,
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
-    )
+        regularizer='basis',
+        num_basis=10
+    ).to(device)
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
     model.train()

From 1327a9796f825f216f8c083a242eaec30e1152aa Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Wed, 7 Dec 2022 00:49:02 +0800
Subject: [PATCH 18/23] Implemented regularizer in RGCN

---
 .../modules/graph_embedding_learning/rgcn.py  | 95 ++++++++++++++-----
 .../pytorch/test/graph_embedding/run_rgcn.py  |  2 +-
 .../{run_rgcn.yaml => run_rgcn_aifb.yaml}     |  0
 .../test/graph_embedding/run_rgcn_am.yaml     | 10 ++
 .../test/graph_embedding/run_rgcn_bgs.yaml    | 10 ++
 .../test/graph_embedding/run_rgcn_mutag.yaml  | 10 ++
 .../test/graph_embedding/test_rgcn_perf.sh    |  9 ++
 7 files changed, 110 insertions(+), 26 deletions(-)
 rename graph4nlp/pytorch/test/graph_embedding/{run_rgcn.yaml => run_rgcn_aifb.yaml} (100%)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index b0a10df9..cd9eeb2f 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -48,8 +48,8 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
-        regularizer=None,
-        num_basis=None,
+        regularizer='basis',
+        num_bases=4,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -60,6 +60,8 @@ def __init__(
         self.activation = activation
         self.bias = bias
         self.RGCN_layers = nn.ModuleList()
+        self.regularizer = regularizer
+        self.num_basis = num_bases
 
         # transform the hidden size format
         if self.num_layers > 1 and type(hidden_size) is int:
@@ -77,6 +79,8 @@ def __init__(
                     activation=self.activation,
                     self_loop=self.self_loop,
                     feat_drop=self.feat_drop,
+                    regularizer=regularizer,
+                    num_bases=num_bases,
                 )
             )
         # hidden layers
@@ -92,7 +96,9 @@ def __init__(
                     activation=self.activation,
                     self_loop=self.self_loop,
                     feat_drop=self.feat_drop,
-                )
+                    regularizer=regularizer,
+                    num_bases=num_bases,
+               )
             )
         # output projection
         self.RGCN_layers.append(
@@ -105,6 +111,8 @@ def __init__(
                 activation=self.activation,
                 self_loop=self.self_loop,
                 feat_drop=self.feat_drop,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         )
 
@@ -191,6 +199,8 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None
     ):
         super(RGCNLayer, self).__init__()
         if direction_option == "undirected":
@@ -203,6 +213,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         elif direction_option == "bi_sep":
             self.model = BiSepRGCNLayer(
@@ -214,6 +226,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         elif direction_option == "bi_fuse":
             self.model = BiFuseRGCNLayer(
@@ -225,6 +239,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         else:
             raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
@@ -405,6 +421,11 @@ class BiFuseRGCNLayer(GNNLayerBase):
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
+    regularizer: str, optional
+        Which weight regularizer to use "basis" or "bdd":
+            - "basis" is short for basis-decomposition.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
     """
 
     def __init__(
@@ -417,14 +438,19 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
-        self.linear_dict_backward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
+        self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        
+        # self.linear_dict_forward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
+        # self.linear_dict_backward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -458,11 +484,17 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = (
-                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
-            )
-            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # linear_dict = (
+            #     self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            # )
+            # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            # m = ln(edges.src["h"])
+            
+            ln = self.ln_fwd if direction == "forward" else self.ln_bwd
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = ln(edges.src["h"], etypes)            
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
@@ -560,6 +592,11 @@ class BiSepRGCNLayer(GNNLayerBase):
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
+    regularizer: str, optional
+        Which weight regularizer to use "basis" or "bdd":
+            - "basis" is short for basis-decomposition.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
     """
 
     def __init__(
@@ -572,15 +609,19 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None,
     ):
         super(BiSepRGCNLayer, self).__init__()
+        self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
 
-        self.linear_dict_forward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
-        self.linear_dict_backward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
+        # self.linear_dict_forward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
+        # self.linear_dict_backward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -613,11 +654,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = (
-                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
-            )
-            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # linear_dict = (
+            #     self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            # )
+            # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            ln = self.ln_fwd if direction == "forward" else self.ln_bwd
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = ln(edges.src["h"], etypes)
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 77ea611f..e3d425c0 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -119,7 +119,7 @@ def main(config):
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
         regularizer='basis',
-        num_basis=10
+        num_bases=10
     ).to(device)
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
rename to graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
new file mode 100644
index 00000000..a2a2a0bc
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'am'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
new file mode 100644
index 00000000..d599e1b3
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'bgs'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
new file mode 100644
index 00000000..814fe76c
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'mutag'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
new file mode 100644
index 00000000..c8312570
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+for i in {1..5}
+do
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_am_$i.log 2>&1 &
+    wait
+done
\ No newline at end of file

From 9e39bb20c08faa85ece7139a7d47291d7a00769b Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Fri, 9 Dec 2022 16:42:00 +0800
Subject: [PATCH 19/23] Sync RGCNLayer implementation

---
 examples/pytorch/rgcn/rgcn.py                 |  4 +-
 .../graph2seq/rgcn_lib/graph2seq.py           |  3 +-
 .../modules/graph_embedding_learning/rgcn.py  | 17 ++--
 .../pytorch/test/graph_embedding/run_rgcn.py  | 85 +++++++++++++++----
 .../test/graph_embedding/test_rgcn_perf.sh    |  0
 5 files changed, 82 insertions(+), 27 deletions(-)
 mode change 100644 => 100755 graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index f7ab4d70..cb124c1b 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -205,9 +205,7 @@ def __init__(
         #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
-            self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True
-            )
+            self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
 
         # weight for self loop
         if self.self_loop:
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 07f85048..ec904835 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -3,6 +3,7 @@
 # from examples.pytorch.rgcn.rgcn import RGCN
 from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
+
 class RGCNGraph2Seq(Graph2Seq):
     def __init__(
         self,
@@ -94,5 +95,5 @@ def _build_gnn_encoder(
             direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
-            feat_drop=feats_dropout
+            feat_drop=feats_dropout,
         )
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cd9eeb2f..cb435247 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -48,7 +48,7 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
-        regularizer='basis',
+        regularizer="none",
         num_bases=4,
     ):
         super(RGCN, self).__init__()
@@ -98,7 +98,7 @@ def __init__(
                     feat_drop=self.feat_drop,
                     regularizer=regularizer,
                     num_bases=num_bases,
-               )
+                )
             )
         # output projection
         self.RGCN_layers.append(
@@ -115,6 +115,9 @@ def __init__(
                 num_bases=num_bases,
             )
         )
+        # Print named parameters
+        # for k, v in self.named_parameters():
+        #     print(f'{k}: {v}')
 
     def forward(self, graph):
         r"""Compute RGCN layer.
@@ -200,7 +203,7 @@ def __init__(
         feat_drop=0.0,
         layer_norm=False,
         regularizer=None,
-        num_bases=None
+        num_bases=None,
     ):
         super(RGCNLayer, self).__init__()
         if direction_option == "undirected":
@@ -439,12 +442,12 @@ def __init__(
         feat_drop=0.0,
         layer_norm=False,
         regularizer=None,
-        num_bases=None
+        num_bases=None,
     ):
         super(BiFuseRGCNLayer, self).__init__()
         self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
-        
+
         # self.linear_dict_forward = nn.ModuleDict(
         #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
         # )
@@ -489,12 +492,12 @@ def message(edges, g, direction):
             # )
             # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             # m = ln(edges.src["h"])
-            
+
             ln = self.ln_fwd if direction == "forward" else self.ln_bwd
             etypes = torch.tensor(
                 [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
             ).to(edges.src["h"].device)
-            m = ln(edges.src["h"], etypes)            
+            m = ln(edges.src["h"], etypes)
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index e3d425c0..27c654ea 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -7,8 +7,9 @@
 from torchmetrics.functional import accuracy
 
 from ...data.data import from_dgl
-from ...modules.graph_embedding_learning.rgcn import RGCN
+from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
+import torch.nn as nn
 
 
 # Load dataset
@@ -93,39 +94,91 @@ def load_data(data_name="aifb", get_norm=False, inv_target=False):
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
+class MyModel(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        input_size,
+        hidden_size,
+        output_size,
+        num_rels,
+        direction_option=None,
+        bias=True,
+        activation=None,
+        self_loop=True,
+        feat_drop=0.0,
+        regularizer="none",
+        num_bases=4,
+        num_nodes=100,
+    ):
+        super(MyModel, self).__init__()
+        self.emb = nn.Embedding(num_nodes, config["hidden_size"])
+        self.layer_1 = RGCNLayer(
+            input_size,
+            hidden_size,
+            num_rels=num_rels,
+            direction_option=direction_option,
+            bias=bias,
+            activation=activation,
+            self_loop=self_loop,
+            feat_drop=feat_drop,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
+        self.layer_2 = RGCNLayer(
+            hidden_size,
+            output_size,
+            num_rels=num_rels,
+            direction_option=direction_option,
+            bias=bias,
+            activation=activation,
+            self_loop=self_loop,
+            feat_drop=feat_drop,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
+
+    def forward(self, g):
+        node_features = 0
+        x1 = F.relu(self.)
+        g.node_features["node_feat"] = self.emb(torch.eye(g.num_nodes()))
+        return self.RGCN(g).node_features["node_emb"]
+
+
 def main(config):
     g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
         data_name=config["dataset"], get_norm=True
     )
 
     # graph = from_dgl(g, is_hetero=False)
-    device = 'cuda:0'
+    device = "cuda:0"
     graph = from_dgl(g).to(device)
     labels = labels.to(device)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config["hidden_size"]).to(device)
-    # emb.requires_grad = True
-    graph.node_features["node_feat"] = emb.weight
-
-    model = RGCN(
-        num_layers=config["num_hidden_layers"],
+    my_model = MyModel(
+        num_layers=config["num_hidden_layers"] + 1,
         input_size=config["hidden_size"],
         hidden_size=config["hidden_size"],
         output_size=num_classes,
         direction_option=config["direction_option"],
-        bias=config['bias'],
+        bias=config["bias"],
         activation=F.relu,
         num_rels=num_rels,
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
-        regularizer='basis',
-        num_bases=10
+        regularizer="basis",
+        num_bases=num_rels,
+        num_nodes=num_nodes,
     ).to(device)
-    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
+    optimizer = torch.optim.Adam(
+        my_model.parameters(),
+        lr=config["lr"],
+        weight_decay=config["wd"],
+    )
     print("start training...")
-    model.train()
+    my_model.train()
     for epoch in range(config["num_epochs"]):
-        logits = model(graph).node_features["node_emb"]
+        logits = my_model(graph)
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
 
@@ -143,9 +196,9 @@ def main(config):
     # Save Model
     # torch.save(model.state_dict(), "./rgcn_model.pt")
     print("start evaluating...")
-    model.eval()
+    my_model.eval()
     with torch.no_grad():
-        logits = model(graph).node_features["node_emb"]
+        logits = my_model(graph)
     logits = logits[target_idx]
     test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
     print("Test Accuracy: {:.4f}".format(test_acc))
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
old mode 100644
new mode 100755

From 1e0c597a908d9f7911607252fc6566a7ade2ac2d Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Fri, 9 Dec 2022 17:24:16 +0800
Subject: [PATCH 20/23] modified DGL benchmark test code for rgcn

---
 .../pytorch/test/graph_embedding/run_rgcn.py   | 18 +++++++++++-------
 .../test/graph_embedding/test_rgcn_perf.sh     |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 27c654ea..e4b63d8d 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -10,6 +10,7 @@
 from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
 import torch.nn as nn
+from ...data.data import GraphData
 
 
 # Load dataset
@@ -112,7 +113,7 @@ def __init__(
         num_nodes=100,
     ):
         super(MyModel, self).__init__()
-        self.emb = nn.Embedding(num_nodes, config["hidden_size"])
+        self.emb = nn.Embedding(num_nodes, hidden_size)
         self.layer_1 = RGCNLayer(
             input_size,
             hidden_size,
@@ -137,12 +138,15 @@ def __init__(
             regularizer=regularizer,
             num_bases=num_bases,
         )
-
-    def forward(self, g):
-        node_features = 0
-        x1 = F.relu(self.)
-        g.node_features["node_feat"] = self.emb(torch.eye(g.num_nodes()))
-        return self.RGCN(g).node_features["node_emb"]
+        for k, v in self.named_parameters():
+            print(f'{k} => {v}')
+
+    def forward(self, g: GraphData):
+        node_features = self.emb(torch.IntTensor(list(range(g.get_node_num()))).to('cuda:0'))
+        dgl_g = g.to_dgl()
+        x1 = self.layer_1(dgl_g, node_features)
+        x2 = self.layer_2(dgl_g, x1)
+        return x2
 
 
 def main(config):
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
index c8312570..3d91d4bd 100755
--- a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
+++ b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
@@ -5,5 +5,5 @@ do
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag_$i.log 2>&1 &
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs_$i.log 2>&1 &
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_am_$i.log 2>&1 &
-    wait
+    # wait
 done
\ No newline at end of file

From 473df491bb25a22fa695c654654d4727bab74ca5 Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sat, 10 Dec 2022 01:28:44 -0800
Subject: [PATCH 21/23] linter

---
 examples/pytorch/math_word_problem/mawps/src/evaluation.py | 4 ++--
 examples/pytorch/question_generation/main.py               | 3 ++-
 graph4nlp/pytorch/test/data_structure/test_graphdata.py    | 3 +--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/pytorch/math_word_problem/mawps/src/evaluation.py b/examples/pytorch/math_word_problem/mawps/src/evaluation.py
index c26280cc..c4f0fb87 100644
--- a/examples/pytorch/math_word_problem/mawps/src/evaluation.py
+++ b/examples/pytorch/math_word_problem/mawps/src/evaluation.py
@@ -1,8 +1,8 @@
-from graph4nlp.pytorch.modules.evaluation.base import EvaluationMetricBase
-
 import sympy
 from sympy.parsing.sympy_parser import parse_expr
 
+from graph4nlp.pytorch.modules.evaluation.base import EvaluationMetricBase
+
 
 class SolutionMatch(EvaluationMetricBase):
     def __init__(self):
diff --git a/examples/pytorch/question_generation/main.py b/examples/pytorch/question_generation/main.py
index 450c1ded..e35cf87d 100644
--- a/examples/pytorch/question_generation/main.py
+++ b/examples/pytorch/question_generation/main.py
@@ -26,9 +26,10 @@
 from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping, to_cuda
 from graph4nlp.pytorch.modules.utils.logger import Logger
 
-from .fused_embedding_construction import FusedEmbeddingConstruction
 from examples.pytorch.semantic_parsing.graph2seq.rgcn_lib.graph2seq import RGCNGraph2Seq
 
+from .fused_embedding_construction import FusedEmbeddingConstruction
+
 
 class QGModel(nn.Module):
     def __init__(self, vocab, config):
diff --git a/graph4nlp/pytorch/test/data_structure/test_graphdata.py b/graph4nlp/pytorch/test/data_structure/test_graphdata.py
index 7d438715..0cf6d8dd 100644
--- a/graph4nlp/pytorch/test/data_structure/test_graphdata.py
+++ b/graph4nlp/pytorch/test/data_structure/test_graphdata.py
@@ -1,14 +1,13 @@
 import gc
 import time
 import matplotlib.pyplot as plt
+import pytest
 import torch
 import torch.nn as nn
 
 from graph4nlp.pytorch.data import GraphData, from_batch, from_dgl, to_batch
 from graph4nlp.pytorch.data.utils import EdgeNotFoundException, SizeMismatchException
 
-import pytest
-
 
 def fail_here():
     raise Exception("The above line of code shouldn't be executed normally")

From dfc4e89bcafd7c4c1bafb21969ed803ad313391e Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sat, 10 Dec 2022 01:36:05 -0800
Subject: [PATCH 22/23] update

---
 examples/pytorch/rgcn/rgcn.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index cb124c1b..7f738a54 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -184,7 +183,6 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {

From a4f8537448811465e3081d519980bbf70ef6c2a5 Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sat, 10 Dec 2022 01:37:54 -0800
Subject: [PATCH 23/23] linter

---
 .../pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py | 1 -
 graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py   | 2 +-
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py           | 5 ++---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index ec904835..0a43a59a 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -1,5 +1,4 @@
 from graph4nlp.pytorch.models.graph2seq import Graph2Seq
-
 # from examples.pytorch.rgcn.rgcn import RGCN
 from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cb435247..5f43bcca 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -1,8 +1,8 @@
 import dgl
 import dgl.function as fn
-from dgl.nn.pytorch.linear import TypedLinear
 import torch
 import torch.nn as nn
+from dgl.nn.pytorch.linear import TypedLinear
 
 from .base import GNNBase, GNNLayerBase
 
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index e4b63d8d..88f27631 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,16 +1,15 @@
 import argparse
 import dgl
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
 
 from torchmetrics.functional import accuracy
 
-from ...data.data import from_dgl
+from ...data.data import GraphData, from_dgl
 from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
-import torch.nn as nn
-from ...data.data import GraphData
 
 
 # Load dataset