From 5e28dfb721ae89fcf83401581c145cda78a962bd Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Tue, 15 Nov 2022 20:33:32 -0500
Subject: [PATCH 01/28] initial merge, awaiting test

---
 .../modules/graph_embedding_learning/rgcn.py  | 255 ++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
new file mode 100644
index 00000000..49324004
--- /dev/null
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -0,0 +1,255 @@
+import dgl
+import dgl.function as fn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import GNNBase, GNNLayerBase
+
+
+class RGCN(GNNBase):
+    r"""Multi-layered `RGCN Network <TODO:paper.pdf>`__
+
+    .. math::
+        TODO:Add Calculation.
+
+    Parameters
+    ----------
+    num_layers: int
+        Number of RGCN layers.
+    input_size : int, or pair of ints
+        Input feature size.
+    hidden_size: int list of int
+        Hidden layer size.
+        If a scalar is given, the sizes of all the hidden layers are the same.
+        If a list of scalar is given, each element in the list is the size of each hidden layer.
+        Example: [100,50]
+    output_size : int
+        Output feature size.
+    num_rels : int
+        Number of relations.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    use_self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    dropout : float, optional
+        Dropout rate. Default: ``0.0``
+    """
+
+    def __init__(
+        self,
+        num_layers,
+        input_size,
+        hidden_size,
+        output_size,
+        num_rels,
+        num_bases=None,
+        use_self_loop=True,
+        dropout=0.0,
+        device="cuda",
+    ):
+        super(RGCN, self).__init__()
+        self.num_layers = num_layers
+        self.num_rels = num_rels
+        self.num_bases = num_bases
+        self.use_self_loop = use_self_loop
+        self.dropout = dropout
+        self.device = device
+
+        self.RGCN_layers = nn.ModuleList()
+
+        # transform the hidden size format
+        if self.num_layers > 1 and type(hidden_size) is int:
+            hidden_size = [hidden_size for i in range(self.num_layers - 1)]
+
+        if self.num_layers > 1:
+            # input projection
+            self.RGCN_layers.append(
+                RGCNLayer(
+                    input_size,
+                    hidden_size[0],
+                    num_rels=self.num_rels,
+                    regularizer="basis",
+                    num_bases=self.num_bases,
+                    bias=True,
+                    activation=F.relu,
+                    self_loop=self.use_self_loop,
+                    dropout=self.dropout,
+                )
+            )
+        # hidden layers
+        for l in range(1, self.num_layers - 1):
+            # due to multi-head, the input_size = hidden_size * num_heads
+            self.RGCN_layers.append(
+                RGCNLayer(
+                    hidden_size[l - 1],
+                    hidden_size[l],
+                    num_rels=self.num_rels,
+                    regularizer="basis",
+                    num_bases=self.num_bases,
+                    bias=True,
+                    activation=F.relu,
+                    self_loop=self.use_self_loop,
+                    dropout=self.dropout,
+                )
+            )
+        # output projection
+        self.RGCN_layers.append(
+            RGCNLayer(
+                hidden_size[-1] if self.num_layers > 1 else input_size,
+                output_size,
+                num_rels=self.num_rels,
+                regularizer="basis",
+                num_bases=self.num_bases,
+                bias=True,
+                activation=F.relu,
+                self_loop=self.use_self_loop,
+                dropout=self.dropout,
+            )
+        )
+
+    def forward(self, graph):
+        r"""Compute RGCN layer.
+
+        Parameters
+        ----------
+        graph : GraphData
+            The graph with node feature stored in the feature field named as
+            "node_feat".
+            The node features are used for message passing.
+
+        Returns
+        -------
+        graph : GraphData
+            The graph with generated node embedding stored in the feature field
+            named as "node_emb".
+        """
+
+        h = graph.node_features["node_feat"]
+        # get the node feature tensor from graph
+        g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
+        # edge_type = g.edata[dgl.ETYPE].long()
+        # output projection
+        if self.num_layers > 1:
+            for l in range(0, self.num_layers - 1):
+                h = self.RGCN_layers[l](g, h)
+
+        logits = self.RGCN_layers[-1](g, h)
+
+        graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
+        return graph
+
+
+class RGCNLayer(GNNLayerBase):
+    r"""A wrapper for RelGraphConv in DGL.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    dropout : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        dropout=0.0,
+        layer_norm=False,
+        device="cuda",
+    ):
+        super(RGCNLayer, self).__init__()
+        self.linear_dict = {
+            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+        }
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+        self.device = device
+
+        # bias
+        if self.bias:
+            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            nn.init.zeros_(self.h_bias)
+
+        # TODO(minjie): consider remove those options in the future to make
+        #   the module only about graph convolution.
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight = nn.LayerNorm(
+                output_size, elementwise_affine=True, device=device
+            )
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g):
+            """Message function."""
+            ln = self.linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight(h)
+            if self.bias:
+                h = h + self.h_bias
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight
+            if self.activation:
+                h = self.activation(h)
+            h = self.dropout(h)
+            return h

From 1d794a9950a085f12f13559e88a3bd1900f0e1b5 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Thu, 17 Nov 2022 16:23:12 -0500
Subject: [PATCH 02/28] add library code and test

---
 .../modules/graph_embedding_learning/rgcn.py  |   4 +-
 .../pytorch/test/graph_embedding/run_rgcn.py  | 191 ++++++++++++++++++
 2 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 49324004..de9e2714 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -46,7 +46,7 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -185,7 +185,7 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
new file mode 100644
index 00000000..7ae1117e
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -0,0 +1,191 @@
+import argparse
+import torch
+import dgl
+import time
+import torch.nn.functional as F
+from torchmetrics.functional import accuracy
+from ...modules.graph_embedding_learning.rgcn import RGCN
+from ...data.data import from_dgl
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
+
+
+# Fix random seed
+# torch.manual_seed(1024)
+# import random
+# random.seed(1024)
+# import numpy as np
+# np.random.seed(1024)
+
+# Load dataset 
+# Reference: dgl/examples/pytorch/rgcn/entity_utils.py (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
+def load_data(data_name='aifb', get_norm=False, inv_target=False):
+    if data_name == 'aifb':
+        dataset = AIFBDataset()
+        # Test Accuracy:
+        # 0.9444, 0.8889, 0.9722, 0.9167, 0.9444 without enorm.
+        # 0.8611, 0.8889, 0.8889, 0.8889, 0.8333
+        # avg: 0.93332 (without enorm)
+        # avg: 0.87222
+        # DGL: 0.8889, 0.8889, 0.8056, 0.8889, 0.8611
+        # DGL avg: 0.86668
+        # paper: 0.9583
+        # note: Could stuck at Local minimum of train loss between 0.2-0.35.
+    elif data_name == 'mutag':
+        dataset = MUTAGDataset()
+        # Test Accuracy:
+        # 0.6912, 0.7500, 0.7353, 0.6324, 0.7353
+        # avg: 0.68884
+        # DGL: 0.6765, 0.7059, 0.7353, 0.6765, 0.6912
+        # DGL avg: 0.69724
+        # paper: 0.7323
+        # note: Could stuck at local minimum of train acc: 0.3897 & loss 0.6931
+    elif data_name == 'bgs':
+        dataset = BGSDataset()
+        # Test Accuracy:
+        # 0.8966, 0.9310, 0.8966, 0.7931, 0.8621
+        # avg: 0.87588
+        # DGL: 0.7931, 0.9310, 0.8966, 0.8276, 0.8966
+        # DGL avg: 0.86898          
+        # paper: 0.8310
+        # note: Could stuck at local minimum of train acc: 0.6325 & loss: 0.6931
+    else:
+        dataset = AMDataset()
+        # Test Accuracy:
+        # 0.7525, 0.7374, 0.7424, 0.7424, 0.7424
+        # avg: 0.74342
+        # DGL: 0.7677, 0.7677, 0.7323, 0.7879, 0.7677
+        # DGL avg: 0.76466
+        # paper: 0.8929
+        # note: args.hidden_size is 10. 
+        # Could stuck at local minimum of train loss: 0.3-0.5
+
+    # Load hetero-graph
+    hg = dataset[0]
+
+    num_rels = len(hg.canonical_etypes)
+    category = dataset.predict_category
+    num_classes = dataset.num_classes
+    labels = hg.nodes[category].data.pop('labels')
+    train_mask = hg.nodes[category].data.pop('train_mask')
+    test_mask = hg.nodes[category].data.pop('test_mask')
+    train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
+    test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
+
+    if get_norm:
+        # Calculate normalization weight for each edge,
+        # 1. / d, d is the degree of the destination node
+        for cetype in hg.canonical_etypes:
+            hg.edges[cetype].data['norm'] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
+        edata = ['norm']
+    else:
+        edata = None
+    category_id = hg.ntypes.index(category)
+    g = dgl.to_homogeneous(hg, edata=edata)
+    node_ids = torch.arange(g.num_nodes())
+
+    # find out the target node ids in g
+    loc = (g.ndata['_TYPE'] == category_id)
+    target_idx = node_ids[loc]
+
+    if inv_target:
+        # Map global node IDs to type-specific node IDs. This is required for
+        # looking up type-specific labels in a minibatch
+        inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
+        inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
+                                           dtype=inv_target.dtype)
+        return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
+    else:
+        return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
+
+
+def main(args):
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=args.dataset, get_norm=True)
+
+    # graph = from_dgl(g, is_hetero=False)
+    graph = from_dgl(g)
+    num_nodes = graph.get_node_num()
+    emb = torch.nn.Embedding(num_nodes, args.hidden_size)
+    # emb.requires_grad = True
+    graph.node_features['node_feat'] = emb.weight
+    
+    model = RGCN(num_layers=args.num_hidden_layers, 
+                 input_size=args.hidden_size,
+                 hidden_size=args.hidden_size,
+                 output_size=num_classes,
+                 num_rels=num_rels,
+                 num_bases=args.num_bases,
+                 use_self_loop=args.use_self_loop,
+                #  gpu=args.gpu,
+                 dropout = args.dropout,
+                 device='cpu')
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+    print("start training...")
+    model.train()
+    for epoch in range(args.num_epochs):
+        logits = model(graph).node_features["node_emb"]
+        logits = logits[target_idx]
+        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
+    
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item()
+        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(epoch, train_acc, loss.item()))
+    print()
+    # Save Model
+    # torch.save(model.state_dict(), "./rgcn_model.pt")
+    print("start evaluating...")
+    model.eval()
+    with torch.no_grad():
+        logits = model(graph).node_features["node_emb"]
+    logits = logits[target_idx]
+    test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
+    print("Test Accuracy: {:.4f}".format(test_acc))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='RGCN for entity classification')
+    parser.add_argument("--num-hidden-layers", type=int, default=1,
+                        help="number of hidden layers beside input/output layer")
+    parser.add_argument("--hidden-size", type=int, default=16,
+                        help="dimension of hidden layer")
+    parser.add_argument("--gpu", type=int, default=-1,
+                        help="GPU device number, -1 for cpu")
+    parser.add_argument("--num-bases", type=int, default=-1,
+                        help="number of filter weight matrices, default: -1 [use all]")
+    parser.add_argument("-d", "--dataset", type=str, required=True,
+                        choices=['aifb', 'mutag', 'bgs', 'am'],
+                        help="dataset to use")
+    parser.add_argument("--use-self-loop", type=bool, default=False,
+                        help="Consider self-loop edges or not")
+    parser.add_argument("--dropout", type=float, default=0.0,
+                        help="Dropout rate")
+    parser.add_argument("--lr", type=float, default=1e-2,
+                        help="Start learning rate")
+    parser.add_argument("--wd", type=float, default=5e-4,
+                        help="weight decay")
+    parser.add_argument("--num-epochs", type=int, default=50,
+                        help="Number of training epochs")
+
+    args = parser.parse_args()
+    print(args)
+    main(args)
+
+
+
+
+
+"""Deprecated RGCN code on Heterogeneous graph due to 
+the lack of support from data structure. The following supports
+are needed (but not limit to):
+- Redefine the feature data structure of node/edge
+    - Index node/edge ids by their type.
+    - Enable type indexed features.
+- Make corresponding changes on views.
+- Make corresponding changes on set/get features functions.
+
+This example bypasses it by storing the features in the model
+itself. It is a code trick and therefore not recommended to
+the user.
+"""
\ No newline at end of file

From 5d57dabfee27922752c78e81604fac3e37223660 Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Fri, 18 Nov 2022 18:12:57 -0800
Subject: [PATCH 03/28] add rgcn for QG

---
 .../config/squad_split2/rgcn_dependency.json         | 12 ++++++++++++
 examples/pytorch/question_generation/main.py         |  6 +++++-
 examples/pytorch/rgcn/rgcn.py                        | 12 ++++--------
 .../semantic_parsing/graph2seq/rgcn_lib/graph2seq.py |  2 +-
 graph4nlp/pytorch/models/graph2seq.py                |  2 +-
 5 files changed, 23 insertions(+), 11 deletions(-)
 create mode 100644 examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json

diff --git a/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json b/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json
new file mode 100644
index 00000000..3536da4e
--- /dev/null
+++ b/examples/pytorch/question_generation/config/squad_split2/rgcn_dependency.json
@@ -0,0 +1,12 @@
+{
+"config_path": "examples/pytorch/question_generation/config/squad_split2/qg.yaml",
+"model_args.graph_construction_args.graph_construction_share.topology_subdir": "DependencyGraphForRGCN",
+"model_args.graph_construction_args.graph_construction_private.edge_strategy": "heterogeneous",
+"model_args.graph_construction_args.graph_construction_private.merge_strategy": "tailhead",
+"model_args.graph_construction_args.graph_construction_private.sequential_link": true,
+"model_args.graph_construction_args.graph_construction_private.as_node": false,
+"model_args.graph_embedding_name": "rgcn",
+"model_args.graph_embedding_args.graph_embedding_private.num_rels": 80,
+"model_args.graph_embedding_args.graph_embedding_private.num_bases": 4,
+"checkpoint_args.out_dir": "out/squad_split2/rgcn_dependency_ckpt"
+}
diff --git a/examples/pytorch/question_generation/main.py b/examples/pytorch/question_generation/main.py
index 326c9a80..450c1ded 100644
--- a/examples/pytorch/question_generation/main.py
+++ b/examples/pytorch/question_generation/main.py
@@ -27,6 +27,7 @@
 from graph4nlp.pytorch.modules.utils.logger import Logger
 
 from .fused_embedding_construction import FusedEmbeddingConstruction
+from examples.pytorch.semantic_parsing.graph2seq.rgcn_lib.graph2seq import RGCNGraph2Seq
 
 
 class QGModel(nn.Module):
@@ -39,7 +40,10 @@ def __init__(self, vocab, config):
         ]
 
         # build Graph2Seq model
-        self.g2s = Graph2Seq.from_args(config, self.vocab)
+        if config["model_args"]["graph_embedding_name"] == "rgcn":
+            self.g2s = RGCNGraph2Seq.from_args(config, self.vocab)
+        else:
+            self.g2s = Graph2Seq.from_args(config, self.vocab)
 
         if "w2v" in self.g2s.graph_initializer.embedding_layer.word_emb_layers:
             self.word_emb = self.g2s.graph_initializer.embedding_layer.word_emb_layers[
diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index 0779e904..50f0a00b 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -54,7 +53,6 @@ def __init__(
         self.num_bases = num_bases
         self.use_self_loop = use_self_loop
         self.dropout = dropout
-        self.device = device
 
         self.RGCN_layers = nn.ModuleList()
 
@@ -185,22 +183,20 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
         self.self_loop = self_loop
         self.layer_norm = layer_norm
-        self.device = device
 
         # bias
         if self.bias:
-            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
         # TODO(minjie): consider remove those options in the future to make
@@ -208,12 +204,12 @@ def __init__(
         # layer norm
         if self.layer_norm:
             self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True, device=device
+                output_size, elementwise_affine=True
             )
 
         # weight for self loop
         if self.self_loop:
-            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
         self.dropout = nn.Dropout(dropout)
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 917264c8..f333071e 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -90,5 +90,5 @@ def _build_gnn_encoder(
             output_size,
             num_rels=gnn_num_rels,
             num_bases=gnn_num_bases,
-            dropout=feats_dropout,
+            dropout=feats_dropout
         )
diff --git a/graph4nlp/pytorch/models/graph2seq.py b/graph4nlp/pytorch/models/graph2seq.py
index 682da3e5..5b2ea919 100644
--- a/graph4nlp/pytorch/models/graph2seq.py
+++ b/graph4nlp/pytorch/models/graph2seq.py
@@ -26,7 +26,7 @@ class Graph2Seq(Graph2XBase):
         >>> "It is just a how-to-use example."
         >>> from graph4nlp.pytorch.modules.config import get_basic_args
         >>> opt = get_basic_args(graph_construction_name="node_emb", graph_embedding_name="gat", decoder_name="stdrnn")
-        >>> graph2seq = Graph2Seq.from_args(opt=opt, vocab_model=vocab_model, device=torch.device("cuda:0"))
+        >>> graph2seq = Graph2Seq.from_args(opt=opt, vocab_model=vocab_model)
         >>> batch_graph = [GraphData() for _ in range(2)]
         >>> tgt_seq = torch.Tensor([[1, 2, 3], [4, 5, 6]])
         >>> seq_out, _, _ = graph2seq(batch_graph=batch_graph, tgt_seq=tgt_seq)

From c4437c1af163b5b109c89ed6ae02aacc5cd2e1f4 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 21:41:34 -0500
Subject: [PATCH 04/28] update config

---
 .../pytorch/test/graph_embedding/run_rgcn.py  | 104 ++++++++----------
 .../test/graph_embedding/run_rgcn.yaml        |  10 ++
 2 files changed, 57 insertions(+), 57 deletions(-)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 7ae1117e..40efd7de 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -6,6 +6,7 @@
 from torchmetrics.functional import accuracy
 from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...data.data import from_dgl
+from ...modules.utils.generic_utils import get_config
 from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
 
 
@@ -98,30 +99,30 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
-def main(args):
-    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=args.dataset, get_norm=True)
+def main(config):
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=config['dataset'], get_norm=True)
 
     # graph = from_dgl(g, is_hetero=False)
     graph = from_dgl(g)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, args.hidden_size)
+    emb = torch.nn.Embedding(num_nodes, config['hidden_size'])
     # emb.requires_grad = True
     graph.node_features['node_feat'] = emb.weight
     
-    model = RGCN(num_layers=args.num_hidden_layers, 
-                 input_size=args.hidden_size,
-                 hidden_size=args.hidden_size,
+    model = RGCN(num_layers=config['num_hidden_layers'], 
+                 input_size=config['hidden_size'],
+                 hidden_size=config['hidden_size'],
                  output_size=num_classes,
                  num_rels=num_rels,
-                 num_bases=args.num_bases,
-                 use_self_loop=args.use_self_loop,
-                #  gpu=args.gpu,
-                 dropout = args.dropout,
-                 device='cpu')
-    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+                 num_bases=config['num_bases'],
+                 use_self_loop=config['use_self_loop'],
+                #  gpu=config.gpu,
+                 dropout = config['dropout'],
+                 device='cpu' if config['gpu'] == -1 else 'cuda:'+str(config['gpu']))
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['wd'])
     print("start training...")
     model.train()
-    for epoch in range(args.num_epochs):
+    for epoch in range(config['num_epochs']):
         logits = model(graph).node_features["node_emb"]
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
@@ -145,47 +146,36 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='RGCN for entity classification')
-    parser.add_argument("--num-hidden-layers", type=int, default=1,
-                        help="number of hidden layers beside input/output layer")
-    parser.add_argument("--hidden-size", type=int, default=16,
-                        help="dimension of hidden layer")
-    parser.add_argument("--gpu", type=int, default=-1,
-                        help="GPU device number, -1 for cpu")
-    parser.add_argument("--num-bases", type=int, default=-1,
-                        help="number of filter weight matrices, default: -1 [use all]")
-    parser.add_argument("-d", "--dataset", type=str, required=True,
-                        choices=['aifb', 'mutag', 'bgs', 'am'],
-                        help="dataset to use")
-    parser.add_argument("--use-self-loop", type=bool, default=False,
-                        help="Consider self-loop edges or not")
-    parser.add_argument("--dropout", type=float, default=0.0,
-                        help="Dropout rate")
-    parser.add_argument("--lr", type=float, default=1e-2,
-                        help="Start learning rate")
-    parser.add_argument("--wd", type=float, default=5e-4,
-                        help="weight decay")
-    parser.add_argument("--num-epochs", type=int, default=50,
-                        help="Number of training epochs")
-
-    args = parser.parse_args()
-    print(args)
-    main(args)
-
-
-
-
-
-"""Deprecated RGCN code on Heterogeneous graph due to 
-the lack of support from data structure. The following supports
-are needed (but not limit to):
-- Redefine the feature data structure of node/edge
-    - Index node/edge ids by their type.
-    - Enable type indexed features.
-- Make corresponding changes on views.
-- Make corresponding changes on set/get features functions.
-
-This example bypasses it by storing the features in the model
-itself. It is a code trick and therefore not recommended to
-the user.
-"""
\ No newline at end of file
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-config", type=str, help="path to the config file")
+    parser.add_argument("--grid_search", action="store_true", help="flag: grid search")
+    cfg = vars(parser.parse_args())
+    config = get_config(cfg["config"])
+
+    # parser = argparse.ArgumentParser(description='RGCN for entity classification')
+    # parser.add_argument("--num-hidden-layers", type=int, default=1,
+    #                     help="number of hidden layers beside input/output layer")
+    # parser.add_argument("--hidden-size", type=int, default=16,
+    #                     help="dimension of hidden layer")
+    # parser.add_argument("--gpu", type=int, default=-1,
+    #                     help="GPU device number, -1 for cpu")
+    # parser.add_argument("--num-bases", type=int, default=-1,
+    #                     help="number of filter weight matrices, default: -1 [use all]")
+    # parser.add_argument("-d", "--dataset", type=str, required=True,
+    #                     choices=['aifb', 'mutag', 'bgs', 'am'],
+    #                     help="dataset to use")
+    # parser.add_argument("--use-self-loop", type=bool, default=False,
+    #                     help="Consider self-loop edges or not")
+    # parser.add_argument("--dropout", type=float, default=0.0,
+    #                     help="Dropout rate")
+    # parser.add_argument("--lr", type=float, default=1e-2,
+    #                     help="Start learning rate")
+    # parser.add_argument("--wd", type=float, default=5e-4,
+    #                     help="weight decay")
+    # parser.add_argument("--num-epochs", type=int, default=50,
+    #                     help="Number of training epochs")
+
+    # args = parser.parse_args()
+    # print(args)
+    print(config)
+    main(config)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
new file mode 100644
index 00000000..448fa495
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+gpu: -1
+num_bases: -1
+dataset: 'aifb'
+use_self_loop: False
+dropout: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 50

From ef70aa37d1f2a4132a382be025511a8916bfe643 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 21:54:36 -0500
Subject: [PATCH 05/28] format the script

---
 .../pytorch/test/graph_embedding/run_rgcn.py  | 87 ++++++++++---------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 40efd7de..441ef737 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,14 +1,13 @@
 import argparse
-import torch
 import dgl
-import time
+import torch
 import torch.nn.functional as F
+from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
 from torchmetrics.functional import accuracy
-from ...modules.graph_embedding_learning.rgcn import RGCN
+
 from ...data.data import from_dgl
+from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...modules.utils.generic_utils import get_config
-from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
-
 
 # Fix random seed
 # torch.manual_seed(1024)
@@ -17,10 +16,13 @@
 # import numpy as np
 # np.random.seed(1024)
 
-# Load dataset 
-# Reference: dgl/examples/pytorch/rgcn/entity_utils.py (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
-def load_data(data_name='aifb', get_norm=False, inv_target=False):
-    if data_name == 'aifb':
+# Load dataset
+# Reference: dgl/examples/pytorch/rgcn/entity_utils.py
+# (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
+
+
+def load_data(data_name="aifb", get_norm=False, inv_target=False):
+    if data_name == "aifb":
         dataset = AIFBDataset()
         # Test Accuracy:
         # 0.9444, 0.8889, 0.9722, 0.9167, 0.9444 without enorm.
@@ -31,7 +33,7 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL avg: 0.86668
         # paper: 0.9583
         # note: Could stuck at Local minimum of train loss between 0.2-0.35.
-    elif data_name == 'mutag':
+    elif data_name == "mutag":
         dataset = MUTAGDataset()
         # Test Accuracy:
         # 0.6912, 0.7500, 0.7353, 0.6324, 0.7353
@@ -40,13 +42,13 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL avg: 0.69724
         # paper: 0.7323
         # note: Could stuck at local minimum of train acc: 0.3897 & loss 0.6931
-    elif data_name == 'bgs':
+    elif data_name == "bgs":
         dataset = BGSDataset()
         # Test Accuracy:
         # 0.8966, 0.9310, 0.8966, 0.7931, 0.8621
         # avg: 0.87588
         # DGL: 0.7931, 0.9310, 0.8966, 0.8276, 0.8966
-        # DGL avg: 0.86898          
+        # DGL avg: 0.86898
         # paper: 0.8310
         # note: Could stuck at local minimum of train acc: 0.6325 & loss: 0.6931
     else:
@@ -57,7 +59,7 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # DGL: 0.7677, 0.7677, 0.7323, 0.7879, 0.7677
         # DGL avg: 0.76466
         # paper: 0.8929
-        # note: args.hidden_size is 10. 
+        # note: args.hidden_size is 10.
         # Could stuck at local minimum of train loss: 0.3-0.5
 
     # Load hetero-graph
@@ -66,9 +68,9 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
     num_rels = len(hg.canonical_etypes)
     category = dataset.predict_category
     num_classes = dataset.num_classes
-    labels = hg.nodes[category].data.pop('labels')
-    train_mask = hg.nodes[category].data.pop('train_mask')
-    test_mask = hg.nodes[category].data.pop('test_mask')
+    labels = hg.nodes[category].data.pop("labels")
+    train_mask = hg.nodes[category].data.pop("train_mask")
+    test_mask = hg.nodes[category].data.pop("test_mask")
     train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
     test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
 
@@ -76,8 +78,8 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
         # Calculate normalization weight for each edge,
         # 1. / d, d is the degree of the destination node
         for cetype in hg.canonical_etypes:
-            hg.edges[cetype].data['norm'] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
-        edata = ['norm']
+            hg.edges[cetype].data["norm"] = dgl.norm_by_dst(hg, cetype).unsqueeze(1)
+        edata = ["norm"]
     else:
         edata = None
     category_id = hg.ntypes.index(category)
@@ -85,54 +87,61 @@ def load_data(data_name='aifb', get_norm=False, inv_target=False):
     node_ids = torch.arange(g.num_nodes())
 
     # find out the target node ids in g
-    loc = (g.ndata['_TYPE'] == category_id)
+    loc = g.ndata["_TYPE"] == category_id
     target_idx = node_ids[loc]
 
     if inv_target:
         # Map global node IDs to type-specific node IDs. This is required for
         # looking up type-specific labels in a minibatch
         inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
-        inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
-                                           dtype=inv_target.dtype)
+        inv_target[target_idx] = torch.arange(0, target_idx.shape[0], dtype=inv_target.dtype)
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
     else:
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
 def main(config):
-    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(data_name=config['dataset'], get_norm=True)
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
+        data_name=config["dataset"], get_norm=True
+    )
 
     # graph = from_dgl(g, is_hetero=False)
     graph = from_dgl(g)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config['hidden_size'])
+    emb = torch.nn.Embedding(num_nodes, config["hidden_size"])
     # emb.requires_grad = True
-    graph.node_features['node_feat'] = emb.weight
-    
-    model = RGCN(num_layers=config['num_hidden_layers'], 
-                 input_size=config['hidden_size'],
-                 hidden_size=config['hidden_size'],
-                 output_size=num_classes,
-                 num_rels=num_rels,
-                 num_bases=config['num_bases'],
-                 use_self_loop=config['use_self_loop'],
-                #  gpu=config.gpu,
-                 dropout = config['dropout'],
-                 device='cpu' if config['gpu'] == -1 else 'cuda:'+str(config['gpu']))
-    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['wd'])
+    graph.node_features["node_feat"] = emb.weight
+
+    model = RGCN(
+        num_layers=config["num_hidden_layers"],
+        input_size=config["hidden_size"],
+        hidden_size=config["hidden_size"],
+        output_size=num_classes,
+        num_rels=num_rels,
+        num_bases=config["num_bases"],
+        use_self_loop=config["use_self_loop"],
+        #  gpu=config.gpu,
+        dropout=config["dropout"],
+        device="cpu" if config["gpu"] == -1 else "cuda:" + str(config["gpu"]),
+    )
+    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
     model.train()
-    for epoch in range(config['num_epochs']):
+    for epoch in range(config["num_epochs"]):
         logits = model(graph).node_features["node_emb"]
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
-    
+
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
 
         train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item()
-        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(epoch, train_acc, loss.item()))
+        print(
+            "Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".format(
+                epoch, train_acc, loss.item()
+            )
+        )
     print()
     # Save Model
     # torch.save(model.state_dict(), "./rgcn_model.pt")

From c20b9c227b2253c8e316d7794091f8e469ff10e1 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Fri, 18 Nov 2022 22:56:40 -0500
Subject: [PATCH 06/28] remove device options from model

---
 .../modules/graph_embedding_learning/rgcn.py  | 14 +++-------
 .../pytorch/test/graph_embedding/run_rgcn.py  | 28 -------------------
 .../test/graph_embedding/run_rgcn.yaml        |  1 -
 3 files changed, 4 insertions(+), 39 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index de9e2714..5ff10129 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -54,7 +53,6 @@ def __init__(
         self.num_bases = num_bases
         self.use_self_loop = use_self_loop
         self.dropout = dropout
-        self.device = device
 
         self.RGCN_layers = nn.ModuleList()
 
@@ -185,35 +183,31 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias, device=device) for i in range(num_rels)
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
         self.self_loop = self_loop
         self.layer_norm = layer_norm
-        self.device = device
 
         # bias
         if self.bias:
-            self.h_bias = nn.Parameter(torch.Tensor(output_size)).to(device)
+            self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
         # TODO(minjie): consider remove those options in the future to make
         #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
-            self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True, device=device
-            )
+            self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
 
         # weight for self loop
         if self.self_loop:
-            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size)).to(device)
+            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
         self.dropout = nn.Dropout(dropout)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 441ef737..f3438efe 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -120,9 +120,7 @@ def main(config):
         num_rels=num_rels,
         num_bases=config["num_bases"],
         use_self_loop=config["use_self_loop"],
-        #  gpu=config.gpu,
         dropout=config["dropout"],
-        device="cpu" if config["gpu"] == -1 else "cuda:" + str(config["gpu"]),
     )
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
@@ -160,31 +158,5 @@ def main(config):
     parser.add_argument("--grid_search", action="store_true", help="flag: grid search")
     cfg = vars(parser.parse_args())
     config = get_config(cfg["config"])
-
-    # parser = argparse.ArgumentParser(description='RGCN for entity classification')
-    # parser.add_argument("--num-hidden-layers", type=int, default=1,
-    #                     help="number of hidden layers beside input/output layer")
-    # parser.add_argument("--hidden-size", type=int, default=16,
-    #                     help="dimension of hidden layer")
-    # parser.add_argument("--gpu", type=int, default=-1,
-    #                     help="GPU device number, -1 for cpu")
-    # parser.add_argument("--num-bases", type=int, default=-1,
-    #                     help="number of filter weight matrices, default: -1 [use all]")
-    # parser.add_argument("-d", "--dataset", type=str, required=True,
-    #                     choices=['aifb', 'mutag', 'bgs', 'am'],
-    #                     help="dataset to use")
-    # parser.add_argument("--use-self-loop", type=bool, default=False,
-    #                     help="Consider self-loop edges or not")
-    # parser.add_argument("--dropout", type=float, default=0.0,
-    #                     help="Dropout rate")
-    # parser.add_argument("--lr", type=float, default=1e-2,
-    #                     help="Start learning rate")
-    # parser.add_argument("--wd", type=float, default=5e-4,
-    #                     help="weight decay")
-    # parser.add_argument("--num-epochs", type=int, default=50,
-    #                     help="Number of training epochs")
-
-    # args = parser.parse_args()
-    # print(args)
     print(config)
     main(config)
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 448fa495..6240bc85 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,6 +1,5 @@
 num_hidden_layers: 1
 hidden_size: 16
-gpu: -1
 num_bases: -1
 dataset: 'aifb'
 use_self_loop: False

From ca03f95a7086f9c1167e8bdc844f8970ba367f59 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:42:33 -0500
Subject: [PATCH 07/28] add direction_options for rgcn

---
 .../modules/graph_embedding_learning/rgcn.py  | 485 ++++++++++++++++--
 .../pytorch/test/graph_embedding/run_rgcn.py  |   8 +-
 .../test/graph_embedding/run_rgcn.yaml        |   7 +-
 3 files changed, 464 insertions(+), 36 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 5ff10129..f98217fd 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -30,10 +30,10 @@ class RGCN(GNNBase):
         Number of relations.
     num_bases : int, optional
         Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
-    use_self_loop : bool, optional
+    self_loop : bool, optional
         True to include self loop message. Default: ``True``.
-    dropout : float, optional
-        Dropout rate. Default: ``0.0``
+    feat_drop : float, optional
+        dropout rate. Default: ``0.0``
     """
 
     def __init__(
@@ -43,17 +43,24 @@ def __init__(
         hidden_size,
         output_size,
         num_rels,
+        direction_option=None,
+        regularizer="basis",
+        bias=True,
+        activation=None,
         num_bases=None,
-        use_self_loop=True,
-        dropout=0.0,
+        self_loop=True,
+        feat_drop=0.0,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
         self.num_rels = num_rels
         self.num_bases = num_bases
-        self.use_self_loop = use_self_loop
-        self.dropout = dropout
-
+        self.self_loop = self_loop
+        self.feat_drop = feat_drop
+        self.direction_option = direction_option
+        self.regularizer = regularizer
+        self.activation = activation
+        self.bias = bias
         self.RGCN_layers = nn.ModuleList()
 
         # transform the hidden size format
@@ -67,12 +74,13 @@ def __init__(
                     input_size,
                     hidden_size[0],
                     num_rels=self.num_rels,
-                    regularizer="basis",
+                    direction_option=self.direction_option,
+                    regularizer=self.regularizer,
                     num_bases=self.num_bases,
-                    bias=True,
-                    activation=F.relu,
-                    self_loop=self.use_self_loop,
-                    dropout=self.dropout,
+                    bias=self.bias,
+                    activation=self.activation,
+                    self_loop=self.self_loop,
+                    feat_drop=self.feat_drop,
                 )
             )
         # hidden layers
@@ -83,12 +91,13 @@ def __init__(
                     hidden_size[l - 1],
                     hidden_size[l],
                     num_rels=self.num_rels,
-                    regularizer="basis",
+                    direction_option=self.direction_option,
+                    regularizer=self.regularizer,
                     num_bases=self.num_bases,
-                    bias=True,
-                    activation=F.relu,
-                    self_loop=self.use_self_loop,
-                    dropout=self.dropout,
+                    bias=self.bias,
+                    activation=self.activation,
+                    self_loop=self.self_loop,
+                    feat_drop=self.feat_drop,
                 )
             )
         # output projection
@@ -97,12 +106,13 @@ def __init__(
                 hidden_size[-1] if self.num_layers > 1 else input_size,
                 output_size,
                 num_rels=self.num_rels,
-                regularizer="basis",
+                direction_option=self.direction_option,
+                regularizer=self.regularizer,
                 num_bases=self.num_bases,
-                bias=True,
-                activation=F.relu,
-                self_loop=self.use_self_loop,
-                dropout=self.dropout,
+                bias=self.bias,
+                activation=self.activation,
+                self_loop=self.self_loop,
+                feat_drop=self.feat_drop,
             )
         )
 
@@ -122,8 +132,12 @@ def forward(self, graph):
             The graph with generated node embedding stored in the feature field
             named as "node_emb".
         """
-
-        h = graph.node_features["node_feat"]
+        feat = graph.node_features["node_feat"]
+        if self.direction_option == "bi_sep":
+            h = [feat, feat]
+        else:
+            h = feat
+        
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
         # edge_type = g.edata[dgl.ETYPE].long()
@@ -134,12 +148,15 @@ def forward(self, graph):
 
         logits = self.RGCN_layers[-1](g, h)
 
+        if self.direction_option == "bi_sep":
+            logits = torch.cat(logits, -1)
+
         graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
         return graph
 
 
 class RGCNLayer(GNNLayerBase):
-    r"""A wrapper for RelGraphConv in DGL.
+    r"""A wrapper for RGCNLayer.
 
     .. math::
         TODO
@@ -165,7 +182,7 @@ class RGCNLayer(GNNLayerBase):
         Activation function. Default: ``None``.
     self_loop : bool, optional
         True to include self loop message. Default: ``True``.
-    dropout : float, optional
+    feat_drop : float, optional
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
@@ -176,15 +193,126 @@ def __init__(
         input_size,
         output_size,
         num_rels,
+        direction_option=None,
         regularizer=None,
         num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
-        dropout=0.0,
+        feat_drop=0.0,
         layer_norm=False,
     ):
         super(RGCNLayer, self).__init__()
+        if direction_option == "undirected":
+            self.model = UndirectedRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        elif direction_option == "bi_sep":
+            self.model = BiSepRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        elif direction_option == "bi_fuse":
+            self.model = BiFuseRGCNLayer(
+                input_size,
+                output_size,
+                num_rels=num_rels,
+                regularizer=regularizer,
+                num_bases=num_bases,
+                bias=bias,
+                activation=activation,
+                self_loop=self_loop,
+                feat_drop=feat_drop,
+                layer_norm=layer_norm
+            )
+        else:
+            raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
+
+    def forward(self, graph, feat):
+        r"""Compute graph attention network layer.
+
+        Parameters
+        ----------
+        graph : DGLGraph
+            The graph.
+        feat : torch.Tensor or pair of torch.Tensor
+            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
+            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
+            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
+            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
+
+        Returns
+        -------
+        torch.Tensor
+            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
+            is the number of heads, and :math:`D_{out}` is size of output feature.
+        """
+        return self.model(graph, feat)
+
+
+class UndirectedRGCNLayer(GNNLayerBase):
+    r"""An undirected RGCN layer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(UndirectedRGCNLayer, self).__init__()
         self.linear_dict = {
             i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
         }
@@ -199,8 +327,6 @@ def __init__(
             self.h_bias = nn.Parameter(torch.Tensor(output_size))
             nn.init.zeros_(self.h_bias)
 
-        # TODO(minjie): consider remove those options in the future to make
-        #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
             self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
@@ -210,7 +336,7 @@ def __init__(
             self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
-        self.dropout = nn.Dropout(dropout)
+        self.dropout = nn.Dropout(feat_drop)
 
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
@@ -247,3 +373,300 @@ def message(edges, g):
                 h = self.activation(h)
             h = self.dropout(h)
             return h
+
+
+class BiFuseRGCNLayer(GNNLayerBase):
+    r"""A Bidirectional version for RGCNLayer, with an additional fuse layer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(BiFuseRGCNLayer, self).__init__()
+        self.linear_dict_forward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+        self.linear_dict_backward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+
+        # bias
+        if self.bias:
+            self.h_bias_forward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_forward)
+            self.h_bias_backward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_backward)
+
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight_forward = nn.LayerNorm(output_size, elementwise_affine=True)
+            self.layer_norm_weight_backward = nn.LayerNorm(output_size, elementwise_affine=True)
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight_forward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_forward, gain=nn.init.calculate_gain("relu"))
+
+            self.loop_weight_backward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_backward, gain=nn.init.calculate_gain("relu"))
+
+        self.fuse_linear = nn.Linear(4 * output_size, output_size, bias=True)
+        self.dropout = nn.Dropout(feat_drop)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g, direction):
+            """Message function."""
+            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_forward(h)
+            if self.bias:
+                h = h + self.h_bias_forward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+            h_forward = h
+        
+        g = g.reverse()
+        with g.local_scope():
+            g.srcdata["h"] = feat
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_backward(h)
+            if self.bias:
+                h = h + self.h_bias_backward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+            h_backward = h
+        
+        fuse_vector = torch.cat([h_forward, h_backward, h_forward*h_backward, h_forward-h_backward], dim=-1)
+        fuse_gate_vector = torch.sigmoid(self.fuse_linear(fuse_vector))
+        h = fuse_gate_vector * h_forward + (1 - fuse_gate_vector) * h_backward
+        
+        if self.activation:
+            h = self.activation(h)
+        h = self.dropout(h)
+        return h
+
+
+class BiSepRGCNLayer(GNNLayerBase):
+    r"""A Bidirectional version for RGCNLayer.
+
+    .. math::
+        TODO
+
+    Parameters
+    ----------
+    input_size : int, or pair of ints
+        Input feature size.
+    output_size : int
+        Output feature size.
+    num_rels: int
+        number of relations
+    regularizer : str, optional
+        Which weight regularizer to use "basis" or "bdd":
+         - "basis" is short for basis-decomposition.
+         - "bdd" is short for block-diagonal-decomposition.
+        Default applies no regularization.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+    bias : bool, optional
+        True if bias is added. Default: ``True``.
+    activation : callable, optional
+        Activation function. Default: ``None``.
+    self_loop : bool, optional
+        True to include self loop message. Default: ``True``.
+    feat_drop : float, optional
+        Dropout rate. Default: ``0.0``
+    layer_norm: float, optional
+        Add layer norm. Default: ``False``
+    """
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        num_rels,
+        regularizer=None,
+        num_bases=None,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        feat_drop=0.0,
+        layer_norm=False,
+    ):
+        super(BiSepRGCNLayer, self).__init__()
+        self.linear_dict_forward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+        self.linear_dict_backward = {
+            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        }
+
+        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+        self.layer_norm = layer_norm
+
+        # bias
+        if self.bias:
+            self.h_bias_forward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_forward)
+            self.h_bias_backward = nn.Parameter(torch.Tensor(output_size))
+            nn.init.zeros_(self.h_bias_backward)
+
+        # layer norm
+        if self.layer_norm:
+            self.layer_norm_weight_forward = nn.LayerNorm(output_size, elementwise_affine=True)
+            self.layer_norm_weight_backward = nn.LayerNorm(output_size, elementwise_affine=True)
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight_forward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_forward, gain=nn.init.calculate_gain("relu"))
+
+            self.loop_weight_backward = nn.Parameter(torch.Tensor(input_size, output_size))
+            nn.init.xavier_uniform_(self.loop_weight_backward, gain=nn.init.calculate_gain("relu"))
+
+        self.dropout = nn.Dropout(feat_drop)
+
+    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
+        def message(edges, g, direction):
+            """Message function."""
+            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            m = ln(edges.src["h"])
+            if "norm" in edges.data:
+                m = m * edges.data["norm"]
+            return {"m": m}
+        feat_forward, feat_backward = feat
+        # self.presorted = presorted
+        with g.local_scope():
+            g.srcdata["h"] = feat_forward
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_forward(h)
+            if self.bias:
+                h = h + self.h_bias_forward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+            h_forward = h
+        
+        g = g.reverse()
+        with g.local_scope():
+            g.srcdata["h"] = feat_backward
+            if norm is not None:
+                g.edata["norm"] = norm
+            # g.edata['etype'] = etypes
+            # message passing
+            from functools import partial
+
+            update_dict = {
+                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+            }
+            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
+            # g.update_all(self.message, fn.sum('m', 'h'))
+            # apply bias and activation
+            h = g.dstdata["h"]
+            if self.layer_norm:
+                h = self.layer_norm_weight_backward(h)
+            if self.bias:
+                h = h + self.h_bias_backward
+            if self.self_loop:
+                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+            h_backward = h
+        
+        if self.activation:
+            h_forward = self.activation(h_forward)
+            h_backward = self.activation(h_backward)
+        h_forward = self.dropout(h_forward)
+        h_backward = self.dropout(h_backward)
+        return [h_forward, h_backward]
\ No newline at end of file
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index f3438efe..77ff6e1d 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -117,10 +117,14 @@ def main(config):
         input_size=config["hidden_size"],
         hidden_size=config["hidden_size"],
         output_size=num_classes,
+        direction_option=config["direction_option"],
+        regularizer="basis",
+        bias=True,
+        activation=F.relu,
         num_rels=num_rels,
         num_bases=config["num_bases"],
-        use_self_loop=config["use_self_loop"],
-        dropout=config["dropout"],
+        self_loop=config["self_loop"],
+        feat_drop=config["feat_drop"],
     )
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 6240bc85..8042b72a 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -2,8 +2,9 @@ num_hidden_layers: 1
 hidden_size: 16
 num_bases: -1
 dataset: 'aifb'
-use_self_loop: False
-dropout: 0.0
+direction_option: "bi_fuse"
+self_loop: False
+feat_drop: 0.0
 lr: 0.01
 wd: 0.0005
-num_epochs: 50
+num_epochs: 150

From af5fc60e4d696c822bbae1e5db349c7b5d012215 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:46:54 -0500
Subject: [PATCH 08/28] format

---
 .../modules/graph_embedding_learning/rgcn.py  | 49 ++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index f98217fd..22cea2c3 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -2,7 +2,6 @@
 import dgl.function as fn
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from .base import GNNBase, GNNLayerBase
 
@@ -137,7 +136,7 @@ def forward(self, graph):
             h = [feat, feat]
         else:
             h = feat
-        
+
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
         # edge_type = g.edata[dgl.ETYPE].long()
@@ -214,7 +213,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         elif direction_option == "bi_sep":
             self.model = BiSepRGCNLayer(
@@ -227,7 +226,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         elif direction_option == "bi_fuse":
             self.model = BiFuseRGCNLayer(
@@ -240,7 +239,7 @@ def __init__(
                 activation=activation,
                 self_loop=self_loop,
                 feat_drop=feat_drop,
-                layer_norm=layer_norm
+                layer_norm=layer_norm,
             )
         else:
             raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
@@ -299,6 +298,7 @@ class UndirectedRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -407,6 +407,7 @@ class BiFuseRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -460,7 +461,9 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            linear_dict = (
+                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            )
             ln = linear_dict[g.canonical_etypes.index(edges._etype)]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
@@ -477,7 +480,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="forward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -490,7 +494,7 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
-        
+
         g = g.reverse()
         with g.local_scope():
             g.srcdata["h"] = feat
@@ -501,7 +505,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="backward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -514,11 +519,13 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
-        
-        fuse_vector = torch.cat([h_forward, h_backward, h_forward*h_backward, h_forward-h_backward], dim=-1)
+
+        fuse_vector = torch.cat(
+            [h_forward, h_backward, h_forward * h_backward, h_forward - h_backward], dim=-1
+        )
         fuse_gate_vector = torch.sigmoid(self.fuse_linear(fuse_vector))
         h = fuse_gate_vector * h_forward + (1 - fuse_gate_vector) * h_backward
-        
+
         if self.activation:
             h = self.activation(h)
         h = self.dropout(h)
@@ -557,6 +564,7 @@ class BiSepRGCNLayer(GNNLayerBase):
     layer_norm: float, optional
         Add layer norm. Default: ``False``
     """
+
     def __init__(
         self,
         input_size,
@@ -609,12 +617,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = self.linear_dict_forward if direction=='forward' else self.linear_dict_backward
+            linear_dict = (
+                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            )
             ln = linear_dict[g.canonical_etypes.index(edges._etype)]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
+
         feat_forward, feat_backward = feat
         # self.presorted = presorted
         with g.local_scope():
@@ -626,7 +637,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='forward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="forward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -639,7 +651,7 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
-        
+
         g = g.reverse()
         with g.local_scope():
             g.srcdata["h"] = feat_backward
@@ -650,7 +662,8 @@ def message(edges, g, direction):
             from functools import partial
 
             update_dict = {
-                etype: (partial(message, g=g, direction='backward'), fn.sum("m", "h")) for etype in g.canonical_etypes
+                etype: (partial(message, g=g, direction="backward"), fn.sum("m", "h"))
+                for etype in g.canonical_etypes
             }
             g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
             # g.update_all(self.message, fn.sum('m', 'h'))
@@ -663,10 +676,10 @@ def message(edges, g, direction):
             if self.self_loop:
                 h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
-        
+
         if self.activation:
             h_forward = self.activation(h_forward)
             h_backward = self.activation(h_backward)
         h_forward = self.dropout(h_forward)
         h_backward = self.dropout(h_backward)
-        return [h_forward, h_backward]
\ No newline at end of file
+        return [h_forward, h_backward]

From e1fa01d551fe3b73e892d7e94374b298c5904566 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 10:49:28 -0500
Subject: [PATCH 09/28] remove unused parameters

---
 .../modules/graph_embedding_learning/rgcn.py  | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 22cea2c3..f61c59df 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -43,21 +43,17 @@ def __init__(
         output_size,
         num_rels,
         direction_option=None,
-        regularizer="basis",
         bias=True,
         activation=None,
-        num_bases=None,
         self_loop=True,
         feat_drop=0.0,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
         self.num_rels = num_rels
-        self.num_bases = num_bases
         self.self_loop = self_loop
         self.feat_drop = feat_drop
         self.direction_option = direction_option
-        self.regularizer = regularizer
         self.activation = activation
         self.bias = bias
         self.RGCN_layers = nn.ModuleList()
@@ -74,8 +70,6 @@ def __init__(
                     hidden_size[0],
                     num_rels=self.num_rels,
                     direction_option=self.direction_option,
-                    regularizer=self.regularizer,
-                    num_bases=self.num_bases,
                     bias=self.bias,
                     activation=self.activation,
                     self_loop=self.self_loop,
@@ -91,8 +85,6 @@ def __init__(
                     hidden_size[l],
                     num_rels=self.num_rels,
                     direction_option=self.direction_option,
-                    regularizer=self.regularizer,
-                    num_bases=self.num_bases,
                     bias=self.bias,
                     activation=self.activation,
                     self_loop=self.self_loop,
@@ -106,8 +98,6 @@ def __init__(
                 output_size,
                 num_rels=self.num_rels,
                 direction_option=self.direction_option,
-                regularizer=self.regularizer,
-                num_bases=self.num_bases,
                 bias=self.bias,
                 activation=self.activation,
                 self_loop=self.self_loop,
@@ -193,8 +183,6 @@ def __init__(
         output_size,
         num_rels,
         direction_option=None,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -207,8 +195,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -220,8 +206,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -233,8 +217,6 @@ def __init__(
                 input_size,
                 output_size,
                 num_rels=num_rels,
-                regularizer=regularizer,
-                num_bases=num_bases,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -304,8 +286,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -413,8 +393,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,
@@ -570,8 +548,6 @@ def __init__(
         input_size,
         output_size,
         num_rels,
-        regularizer=None,
-        num_bases=None,
         bias=True,
         activation=None,
         self_loop=False,

From 279e0b020ee5418e4ce039ac6888692c57a4a1e0 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 11:08:54 -0500
Subject: [PATCH 10/28] isort fix

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 77ff6e1d..3e2ce321 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,4 +1,5 @@
 import argparse
+
 import dgl
 import torch
 import torch.nn.functional as F

From 124c20580a9a5930871d3d9418281eb6fb859a45 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 11:13:40 -0500
Subject: [PATCH 11/28] isort fix again

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 3e2ce321..dc4d97a1 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,5 +1,4 @@
 import argparse
-
 import dgl
 import torch
 import torch.nn.functional as F
@@ -10,18 +9,10 @@
 from ...modules.graph_embedding_learning.rgcn import RGCN
 from ...modules.utils.generic_utils import get_config
 
-# Fix random seed
-# torch.manual_seed(1024)
-# import random
-# random.seed(1024)
-# import numpy as np
-# np.random.seed(1024)
 
 # Load dataset
 # Reference: dgl/examples/pytorch/rgcn/entity_utils.py
 # (https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/entity_utils.py)
-
-
 def load_data(data_name="aifb", get_norm=False, inv_target=False):
     if data_name == "aifb":
         dataset = AIFBDataset()

From a6c1d4579181908cec6b8bad0fee82f0210b2b2e Mon Sep 17 00:00:00 2001
From: AlanSwift <shenkai200861@gmail.com>
Date: Sat, 19 Nov 2022 16:39:07 +0000
Subject: [PATCH 12/28] fix ci

---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index dc4d97a1..2df2b32a 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -3,6 +3,7 @@
 import torch
 import torch.nn.functional as F
 from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
+
 from torchmetrics.functional import accuracy
 
 from ...data.data import from_dgl

From e5d53a6defaede2a4e29f5b086c9d3ca4bed6f89 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Sat, 19 Nov 2022 14:18:54 -0500
Subject: [PATCH 13/28] update rgcn & test case

---
 examples/pytorch/rgcn/rgcn.py                          |  4 ++--
 .../semantic_parsing/graph2seq/rgcn_lib/graph2seq.py   | 10 ++++++----
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py     |  4 +---
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml   |  8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index 0779e904..95b518a0 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,7 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -185,7 +185,7 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cuda",
+        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 917264c8..38ca29a9 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -1,7 +1,7 @@
 from graph4nlp.pytorch.models.graph2seq import Graph2Seq
 
-from examples.pytorch.rgcn.rgcn import RGCN
-
+# from examples.pytorch.rgcn.rgcn import RGCN
+from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
 class RGCNGraph2Seq(Graph2Seq):
     def __init__(
@@ -89,6 +89,8 @@ def _build_gnn_encoder(
             hidden_size,
             output_size,
             num_rels=gnn_num_rels,
-            num_bases=gnn_num_bases,
-            dropout=feats_dropout,
+            direction_option="undirected",
+            # num_bases=gnn_num_bases,
+            # dropout=feats_dropout,
+            feat_drop=feats_dropout
         )
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 2df2b32a..cdb6c5f7 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -111,11 +111,9 @@ def main(config):
         hidden_size=config["hidden_size"],
         output_size=num_classes,
         direction_option=config["direction_option"],
-        regularizer="basis",
-        bias=True,
+        bias=config['bias'],
         activation=F.relu,
         num_rels=num_rels,
-        num_bases=config["num_bases"],
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
     )
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 8042b72a..86544beb 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,10 +1,10 @@
-num_hidden_layers: 1
-hidden_size: 16
-num_bases: -1
+num_hidden_layers: 3
+hidden_size: 32
 dataset: 'aifb'
 direction_option: "bi_fuse"
 self_loop: False
+bias: True
 feat_drop: 0.0
 lr: 0.01
 wd: 0.0005
-num_epochs: 150
+num_epochs: 200

From 570dc89110ce1e6e0e17e64e7b1842ceed8c9460 Mon Sep 17 00:00:00 2001
From: AlanSwift <shenkai200861@gmail.com>
Date: Thu, 24 Nov 2022 02:10:46 +0000
Subject: [PATCH 14/28] fix

---
 .../config/train_dep_rgcn_bi_sep.json         |  5 ++++
 .../config/train_dep_rgcn_undirected.json     |  5 ++++
 .../semantic_parsing/graph2seq/main_rgcn.py   |  2 +-
 .../graph2seq/rgcn_lib/graph2seq.py           |  4 +++-
 .../modules/graph_embedding_learning/rgcn.py  | 23 ++++++++++---------
 5 files changed, 26 insertions(+), 13 deletions(-)
 create mode 100644 examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
 create mode 100644 examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json

diff --git a/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
new file mode 100644
index 00000000..b19c6336
--- /dev/null
+++ b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_bi_sep.json
@@ -0,0 +1,5 @@
+{
+    "config_path": "examples/pytorch/semantic_parsing/graph2seq/config/dependency_rgcn_undirected.yaml",
+    "model_args.graph_embedding_args.graph_embedding_share.direction_option": "bi_sep",
+    "training_args.log_file": "examples/pytorch/semantic_parsing/graph2seq/log/dependency_rgcn_bi_sep.txt"
+}
diff --git a/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json
new file mode 100644
index 00000000..77deec0e
--- /dev/null
+++ b/examples/pytorch/semantic_parsing/graph2seq/config/train_dep_rgcn_undirected.json
@@ -0,0 +1,5 @@
+{
+    "config_path": "examples/pytorch/semantic_parsing/graph2seq/config/dependency_rgcn_undirected.yaml",
+    "model_args.graph_embedding_args.graph_embedding_share.direction_option": "undirected",
+    "training_args.log_file": "examples/pytorch/semantic_parsing/graph2seq/log/dependency_rgcn_undirected.txt"
+}
diff --git a/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py b/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
index 5b3b7a24..eb683ebc 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/main_rgcn.py
@@ -52,7 +52,7 @@ def _build_logger(self, log_file):
         import os
 
         log_folder = os.path.split(log_file)[0]
-        if not os.path.exists(log_file):
+        if not os.path.exists(log_folder):
             os.makedirs(log_folder)
         self.logger = get_log(log_file)
 
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 38ca29a9..07f85048 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -74,10 +74,12 @@ def __init__(
 
     def _build_gnn_encoder(
         self,
+        gnn,
         num_layers,
         input_size,
         hidden_size,
         output_size,
+        direction_option,
         feats_dropout,
         gnn_num_rels=80,
         gnn_num_bases=4,
@@ -89,7 +91,7 @@ def _build_gnn_encoder(
             hidden_size,
             output_size,
             num_rels=gnn_num_rels,
-            direction_option="undirected",
+            direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
             feat_drop=feats_dropout
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index f61c59df..53eace14 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -293,9 +293,9 @@ def __init__(
         layer_norm=False,
     ):
         super(UndirectedRGCNLayer, self).__init__()
-        self.linear_dict = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        self.linear_dict = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
@@ -321,7 +321,7 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
             """Message function."""
-            ln = self.linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
@@ -555,12 +555,13 @@ def __init__(
         layer_norm=False,
     ):
         super(BiSepRGCNLayer, self).__init__()
-        self.linear_dict_forward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
-        self.linear_dict_backward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        
+        self.linear_dict_forward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
+        self.linear_dict_backward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -596,7 +597,7 @@ def message(edges, g, direction):
             linear_dict = (
                 self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
             )
-            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]

From 06703a4a136ab23530c3b6baf1646fced9440894 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Wed, 23 Nov 2022 21:29:17 -0500
Subject: [PATCH 15/28] bug fix on bi sep

---
 graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py | 4 ++--
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml       | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 53eace14..cc681cd6 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -626,7 +626,7 @@ def message(edges, g, direction):
             if self.bias:
                 h = h + self.h_bias_forward
             if self.self_loop:
-                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_forward
+                h = h + feat_forward[: g.num_dst_nodes()] @ self.loop_weight_forward
             h_forward = h
 
         g = g.reverse()
@@ -651,7 +651,7 @@ def message(edges, g, direction):
             if self.bias:
                 h = h + self.h_bias_backward
             if self.self_loop:
-                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight_backward
+                h = h + feat_backward[: g.num_dst_nodes()] @ self.loop_weight_backward
             h_backward = h
 
         if self.activation:
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
index 86544beb..38c59cf6 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
@@ -1,7 +1,7 @@
-num_hidden_layers: 3
-hidden_size: 32
+num_hidden_layers: 1
+hidden_size: 16
 dataset: 'aifb'
-direction_option: "bi_fuse"
+direction_option: "undirected"
 self_loop: False
 bias: True
 feat_drop: 0.0

From 5b5f77a9cfc7ea13b48b9b0c0a851f2c011eb096 Mon Sep 17 00:00:00 2001
From: Shaw Liu <shawxiaoliu@gmail.com>
Date: Wed, 23 Nov 2022 21:37:23 -0500
Subject: [PATCH 16/28] bug fix on bi_fuse

---
 .../modules/graph_embedding_learning/rgcn.py       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cc681cd6..b1e024b8 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -400,12 +400,12 @@ def __init__(
         layer_norm=False,
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
-        self.linear_dict_backward = {
-            i: nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        }
+        self.linear_dict_forward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
+        self.linear_dict_backward = nn.ModuleDict({
+            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        })
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -442,7 +442,7 @@ def message(edges, g, direction):
             linear_dict = (
                 self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
             )
-            ln = linear_dict[g.canonical_etypes.index(edges._etype)]
+            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             m = ln(edges.src["h"])
             if "norm" in edges.data:
                 m = m * edges.data["norm"]

From 6b0bdc8ed1c188bb58b23e45aa13eb03e5c5f263 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Thu, 24 Nov 2022 16:46:17 +0800
Subject: [PATCH 17/28] Change implementation of RGCN linear layer to DGL impl

---
 .../modules/graph_embedding_learning/rgcn.py  | 55 +++++++++++++------
 .../pytorch/test/graph_embedding/run_rgcn.py  | 10 +++-
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index b1e024b8..b0a10df9 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -1,5 +1,6 @@
 import dgl
 import dgl.function as fn
+from dgl.nn.pytorch.linear import TypedLinear
 import torch
 import torch.nn as nn
 
@@ -47,6 +48,8 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
+        regularizer=None,
+        num_basis=None,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -291,11 +294,20 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None,
     ):
         super(UndirectedRGCNLayer, self).__init__()
-        self.linear_dict = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+        # self.linear_dict = nn.ModuleDict({
+        #     str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
+        # })
+        self.linear = TypedLinear(
+            in_size=input_size,
+            out_size=output_size,
+            num_types=num_rels,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
         self.activation = activation
@@ -321,8 +333,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g):
             """Message function."""
-            ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # ln = self.linear(edges.src['h'], edges.data['type'])
+            # ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            # m = ln(edges.src["h"])
+
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = self.linear(edges.src["h"], etypes)
+
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
@@ -400,12 +419,12 @@ def __init__(
         layer_norm=False,
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
-        self.linear_dict_backward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+        self.linear_dict_forward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
+        self.linear_dict_backward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -555,13 +574,13 @@ def __init__(
         layer_norm=False,
     ):
         super(BiSepRGCNLayer, self).__init__()
-        
-        self.linear_dict_forward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
-        self.linear_dict_backward = nn.ModuleDict({
-            str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        })
+
+        self.linear_dict_forward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
+        self.linear_dict_backward = nn.ModuleDict(
+            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index cdb6c5f7..77ea611f 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -99,9 +99,11 @@ def main(config):
     )
 
     # graph = from_dgl(g, is_hetero=False)
-    graph = from_dgl(g)
+    device = 'cuda:0'
+    graph = from_dgl(g).to(device)
+    labels = labels.to(device)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config["hidden_size"])
+    emb = torch.nn.Embedding(num_nodes, config["hidden_size"]).to(device)
     # emb.requires_grad = True
     graph.node_features["node_feat"] = emb.weight
 
@@ -116,7 +118,9 @@ def main(config):
         num_rels=num_rels,
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
-    )
+        regularizer='basis',
+        num_basis=10
+    ).to(device)
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
     model.train()

From 1327a9796f825f216f8c083a242eaec30e1152aa Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Wed, 7 Dec 2022 00:49:02 +0800
Subject: [PATCH 18/28] Implemented regularizer in RGCN

---
 .../modules/graph_embedding_learning/rgcn.py  | 95 ++++++++++++++-----
 .../pytorch/test/graph_embedding/run_rgcn.py  |  2 +-
 .../{run_rgcn.yaml => run_rgcn_aifb.yaml}     |  0
 .../test/graph_embedding/run_rgcn_am.yaml     | 10 ++
 .../test/graph_embedding/run_rgcn_bgs.yaml    | 10 ++
 .../test/graph_embedding/run_rgcn_mutag.yaml  | 10 ++
 .../test/graph_embedding/test_rgcn_perf.sh    |  9 ++
 7 files changed, 110 insertions(+), 26 deletions(-)
 rename graph4nlp/pytorch/test/graph_embedding/{run_rgcn.yaml => run_rgcn_aifb.yaml} (100%)
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
 create mode 100644 graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh

diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index b0a10df9..cd9eeb2f 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -48,8 +48,8 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
-        regularizer=None,
-        num_basis=None,
+        regularizer='basis',
+        num_bases=4,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -60,6 +60,8 @@ def __init__(
         self.activation = activation
         self.bias = bias
         self.RGCN_layers = nn.ModuleList()
+        self.regularizer = regularizer
+        self.num_basis = num_bases
 
         # transform the hidden size format
         if self.num_layers > 1 and type(hidden_size) is int:
@@ -77,6 +79,8 @@ def __init__(
                     activation=self.activation,
                     self_loop=self.self_loop,
                     feat_drop=self.feat_drop,
+                    regularizer=regularizer,
+                    num_bases=num_bases,
                 )
             )
         # hidden layers
@@ -92,7 +96,9 @@ def __init__(
                     activation=self.activation,
                     self_loop=self.self_loop,
                     feat_drop=self.feat_drop,
-                )
+                    regularizer=regularizer,
+                    num_bases=num_bases,
+               )
             )
         # output projection
         self.RGCN_layers.append(
@@ -105,6 +111,8 @@ def __init__(
                 activation=self.activation,
                 self_loop=self.self_loop,
                 feat_drop=self.feat_drop,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         )
 
@@ -191,6 +199,8 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None
     ):
         super(RGCNLayer, self).__init__()
         if direction_option == "undirected":
@@ -203,6 +213,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         elif direction_option == "bi_sep":
             self.model = BiSepRGCNLayer(
@@ -214,6 +226,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         elif direction_option == "bi_fuse":
             self.model = BiFuseRGCNLayer(
@@ -225,6 +239,8 @@ def __init__(
                 self_loop=self_loop,
                 feat_drop=feat_drop,
                 layer_norm=layer_norm,
+                regularizer=regularizer,
+                num_bases=num_bases,
             )
         else:
             raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
@@ -405,6 +421,11 @@ class BiFuseRGCNLayer(GNNLayerBase):
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
+    regularizer: str, optional
+        Which weight regularizer to use "basis" or "bdd":
+            - "basis" is short for basis-decomposition.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
     """
 
     def __init__(
@@ -417,14 +438,19 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None
     ):
         super(BiFuseRGCNLayer, self).__init__()
-        self.linear_dict_forward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
-        self.linear_dict_backward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
+        self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        
+        # self.linear_dict_forward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
+        # self.linear_dict_backward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -458,11 +484,17 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = (
-                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
-            )
-            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # linear_dict = (
+            #     self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            # )
+            # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            # m = ln(edges.src["h"])
+            
+            ln = self.ln_fwd if direction == "forward" else self.ln_bwd
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = ln(edges.src["h"], etypes)            
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
@@ -560,6 +592,11 @@ class BiSepRGCNLayer(GNNLayerBase):
         Dropout rate. Default: ``0.0``
     layer_norm: float, optional
         Add layer norm. Default: ``False``
+    regularizer: str, optional
+        Which weight regularizer to use "basis" or "bdd":
+            - "basis" is short for basis-decomposition.
+    num_bases : int, optional
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
     """
 
     def __init__(
@@ -572,15 +609,19 @@ def __init__(
         self_loop=False,
         feat_drop=0.0,
         layer_norm=False,
+        regularizer=None,
+        num_bases=None,
     ):
         super(BiSepRGCNLayer, self).__init__()
+        self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
+        self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
 
-        self.linear_dict_forward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
-        self.linear_dict_backward = nn.ModuleDict(
-            {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
-        )
+        # self.linear_dict_forward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
+        # self.linear_dict_backward = nn.ModuleDict(
+        #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
+        # )
 
         # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.bias = bias
@@ -613,11 +654,15 @@ def __init__(
     def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
         def message(edges, g, direction):
             """Message function."""
-            linear_dict = (
-                self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
-            )
-            ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            m = ln(edges.src["h"])
+            # linear_dict = (
+            #     self.linear_dict_forward if direction == "forward" else self.linear_dict_backward
+            # )
+            # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
+            ln = self.ln_fwd if direction == "forward" else self.ln_bwd
+            etypes = torch.tensor(
+                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
+            ).to(edges.src["h"].device)
+            m = ln(edges.src["h"], etypes)
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 77ea611f..e3d425c0 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -119,7 +119,7 @@ def main(config):
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
         regularizer='basis',
-        num_basis=10
+        num_bases=10
     ).to(device)
     optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
     print("start training...")
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn.yaml
rename to graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
new file mode 100644
index 00000000..a2a2a0bc
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'am'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
new file mode 100644
index 00000000..d599e1b3
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'bgs'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
new file mode 100644
index 00000000..814fe76c
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
@@ -0,0 +1,10 @@
+num_hidden_layers: 1
+hidden_size: 16
+dataset: 'mutag'
+direction_option: "undirected"
+self_loop: False
+bias: True
+feat_drop: 0.0
+lr: 0.01
+wd: 0.0005
+num_epochs: 200
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
new file mode 100644
index 00000000..c8312570
--- /dev/null
+++ b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+for i in {1..5}
+do
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs_$i.log 2>&1 &
+    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_am_$i.log 2>&1 &
+    wait
+done
\ No newline at end of file

From 9e39bb20c08faa85ece7139a7d47291d7a00769b Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Fri, 9 Dec 2022 16:42:00 +0800
Subject: [PATCH 19/28] Sync RGCNLayer implementation

---
 examples/pytorch/rgcn/rgcn.py                 |  4 +-
 .../graph2seq/rgcn_lib/graph2seq.py           |  3 +-
 .../modules/graph_embedding_learning/rgcn.py  | 17 ++--
 .../pytorch/test/graph_embedding/run_rgcn.py  | 85 +++++++++++++++----
 .../test/graph_embedding/test_rgcn_perf.sh    |  0
 5 files changed, 82 insertions(+), 27 deletions(-)
 mode change 100644 => 100755 graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index f7ab4d70..cb124c1b 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -205,9 +205,7 @@ def __init__(
         #   the module only about graph convolution.
         # layer norm
         if self.layer_norm:
-            self.layer_norm_weight = nn.LayerNorm(
-                output_size, elementwise_affine=True
-            )
+            self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
 
         # weight for self loop
         if self.self_loop:
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 07f85048..ec904835 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -3,6 +3,7 @@
 # from examples.pytorch.rgcn.rgcn import RGCN
 from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
+
 class RGCNGraph2Seq(Graph2Seq):
     def __init__(
         self,
@@ -94,5 +95,5 @@ def _build_gnn_encoder(
             direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
-            feat_drop=feats_dropout
+            feat_drop=feats_dropout,
         )
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cd9eeb2f..cb435247 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -48,7 +48,7 @@ def __init__(
         activation=None,
         self_loop=True,
         feat_drop=0.0,
-        regularizer='basis',
+        regularizer="none",
         num_bases=4,
     ):
         super(RGCN, self).__init__()
@@ -98,7 +98,7 @@ def __init__(
                     feat_drop=self.feat_drop,
                     regularizer=regularizer,
                     num_bases=num_bases,
-               )
+                )
             )
         # output projection
         self.RGCN_layers.append(
@@ -115,6 +115,9 @@ def __init__(
                 num_bases=num_bases,
             )
         )
+        # Print named parameters
+        # for k, v in self.named_parameters():
+        #     print(f'{k}: {v}')
 
     def forward(self, graph):
         r"""Compute RGCN layer.
@@ -200,7 +203,7 @@ def __init__(
         feat_drop=0.0,
         layer_norm=False,
         regularizer=None,
-        num_bases=None
+        num_bases=None,
     ):
         super(RGCNLayer, self).__init__()
         if direction_option == "undirected":
@@ -439,12 +442,12 @@ def __init__(
         feat_drop=0.0,
         layer_norm=False,
         regularizer=None,
-        num_bases=None
+        num_bases=None,
     ):
         super(BiFuseRGCNLayer, self).__init__()
         self.ln_fwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
         self.ln_bwd = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
-        
+
         # self.linear_dict_forward = nn.ModuleDict(
         #     {str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)}
         # )
@@ -489,12 +492,12 @@ def message(edges, g, direction):
             # )
             # ln = linear_dict[str(g.canonical_etypes.index(edges._etype))]
             # m = ln(edges.src["h"])
-            
+
             ln = self.ln_fwd if direction == "forward" else self.ln_bwd
             etypes = torch.tensor(
                 [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
             ).to(edges.src["h"].device)
-            m = ln(edges.src["h"], etypes)            
+            m = ln(edges.src["h"], etypes)
             if "norm" in edges.data:
                 m = m * edges.data["norm"]
             return {"m": m}
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index e3d425c0..27c654ea 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -7,8 +7,9 @@
 from torchmetrics.functional import accuracy
 
 from ...data.data import from_dgl
-from ...modules.graph_embedding_learning.rgcn import RGCN
+from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
+import torch.nn as nn
 
 
 # Load dataset
@@ -93,39 +94,91 @@ def load_data(data_name="aifb", get_norm=False, inv_target=False):
         return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
 
 
+class MyModel(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        input_size,
+        hidden_size,
+        output_size,
+        num_rels,
+        direction_option=None,
+        bias=True,
+        activation=None,
+        self_loop=True,
+        feat_drop=0.0,
+        regularizer="none",
+        num_bases=4,
+        num_nodes=100,
+    ):
+        super(MyModel, self).__init__()
+        self.emb = nn.Embedding(num_nodes, config["hidden_size"])
+        self.layer_1 = RGCNLayer(
+            input_size,
+            hidden_size,
+            num_rels=num_rels,
+            direction_option=direction_option,
+            bias=bias,
+            activation=activation,
+            self_loop=self_loop,
+            feat_drop=feat_drop,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
+        self.layer_2 = RGCNLayer(
+            hidden_size,
+            output_size,
+            num_rels=num_rels,
+            direction_option=direction_option,
+            bias=bias,
+            activation=activation,
+            self_loop=self_loop,
+            feat_drop=feat_drop,
+            regularizer=regularizer,
+            num_bases=num_bases,
+        )
+
+    def forward(self, g):
+        node_features = 0
+        x1 = F.relu(self.)
+        g.node_features["node_feat"] = self.emb(torch.eye(g.num_nodes()))
+        return self.RGCN(g).node_features["node_emb"]
+
+
 def main(config):
     g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
         data_name=config["dataset"], get_norm=True
     )
 
     # graph = from_dgl(g, is_hetero=False)
-    device = 'cuda:0'
+    device = "cuda:0"
     graph = from_dgl(g).to(device)
     labels = labels.to(device)
     num_nodes = graph.get_node_num()
-    emb = torch.nn.Embedding(num_nodes, config["hidden_size"]).to(device)
-    # emb.requires_grad = True
-    graph.node_features["node_feat"] = emb.weight
-
-    model = RGCN(
-        num_layers=config["num_hidden_layers"],
+    my_model = MyModel(
+        num_layers=config["num_hidden_layers"] + 1,
         input_size=config["hidden_size"],
         hidden_size=config["hidden_size"],
         output_size=num_classes,
         direction_option=config["direction_option"],
-        bias=config['bias'],
+        bias=config["bias"],
         activation=F.relu,
         num_rels=num_rels,
         self_loop=config["self_loop"],
         feat_drop=config["feat_drop"],
-        regularizer='basis',
-        num_bases=10
+        regularizer="basis",
+        num_bases=num_rels,
+        num_nodes=num_nodes,
     ).to(device)
-    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
+    optimizer = torch.optim.Adam(
+        my_model.parameters(),
+        lr=config["lr"],
+        weight_decay=config["wd"],
+    )
     print("start training...")
-    model.train()
+    my_model.train()
     for epoch in range(config["num_epochs"]):
-        logits = model(graph).node_features["node_emb"]
+        logits = my_model(graph)
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
 
@@ -143,9 +196,9 @@ def main(config):
     # Save Model
     # torch.save(model.state_dict(), "./rgcn_model.pt")
     print("start evaluating...")
-    model.eval()
+    my_model.eval()
     with torch.no_grad():
-        logits = model(graph).node_features["node_emb"]
+        logits = my_model(graph)
     logits = logits[target_idx]
     test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
     print("Test Accuracy: {:.4f}".format(test_acc))
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
old mode 100644
new mode 100755

From 1e0c597a908d9f7911607252fc6566a7ade2ac2d Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Fri, 9 Dec 2022 17:24:16 +0800
Subject: [PATCH 20/28] modified DGL benchmark test code for rgcn

---
 .../pytorch/test/graph_embedding/run_rgcn.py   | 18 +++++++++++-------
 .../test/graph_embedding/test_rgcn_perf.sh     |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 27c654ea..e4b63d8d 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -10,6 +10,7 @@
 from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
 import torch.nn as nn
+from ...data.data import GraphData
 
 
 # Load dataset
@@ -112,7 +113,7 @@ def __init__(
         num_nodes=100,
     ):
         super(MyModel, self).__init__()
-        self.emb = nn.Embedding(num_nodes, config["hidden_size"])
+        self.emb = nn.Embedding(num_nodes, hidden_size)
         self.layer_1 = RGCNLayer(
             input_size,
             hidden_size,
@@ -137,12 +138,15 @@ def __init__(
             regularizer=regularizer,
             num_bases=num_bases,
         )
-
-    def forward(self, g):
-        node_features = 0
-        x1 = F.relu(self.)
-        g.node_features["node_feat"] = self.emb(torch.eye(g.num_nodes()))
-        return self.RGCN(g).node_features["node_emb"]
+        for k, v in self.named_parameters():
+            print(f'{k} => {v}')
+
+    def forward(self, g: GraphData):
+        node_features = self.emb(torch.IntTensor(list(range(g.get_node_num()))).to('cuda:0'))
+        dgl_g = g.to_dgl()
+        x1 = self.layer_1(dgl_g, node_features)
+        x2 = self.layer_2(dgl_g, x1)
+        return x2
 
 
 def main(config):
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
index c8312570..3d91d4bd 100755
--- a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
+++ b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
@@ -5,5 +5,5 @@ do
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag_$i.log 2>&1 &
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs_$i.log 2>&1 &
     python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_am_$i.log 2>&1 &
-    wait
+    # wait
 done
\ No newline at end of file

From 9c82c72103522326b02803ef0a4d999deafa632a Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Fri, 18 Nov 2022 18:30:57 -0800
Subject: [PATCH 21/28] add rgcn for text classification

---
 examples/pytorch/rgcn/rgcn.py                 |  2 --
 .../CAirline/gat_bi_sep_dependency.json       |  2 +-
 .../CAirline/ggnn_bi_sep_constituency.json    |  2 +-
 .../config/CAirline/ggnn_bi_sep_node_emb.json |  2 +-
 ...nn_bi_sep_node_emb_refined_dependency.json |  2 +-
 .../config/CAirline/rgcn_dependency.json      | 13 ++++++++++++
 .../config/trec/rgcn_dependency.json          | 14 +++++++++++++
 .../run_text_classifier.py                    | 21 +++++++++++++++++++
 8 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 examples/pytorch/text_classification/config/CAirline/rgcn_dependency.json
 create mode 100644 examples/pytorch/text_classification/config/trec/rgcn_dependency.json

diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
index cb124c1b..7f738a54 100644
--- a/examples/pytorch/rgcn/rgcn.py
+++ b/examples/pytorch/rgcn/rgcn.py
@@ -46,7 +46,6 @@ def __init__(
         num_bases=None,
         use_self_loop=True,
         dropout=0.0,
-        device="cpu",
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
@@ -184,7 +183,6 @@ def __init__(
         self_loop=False,
         dropout=0.0,
         layer_norm=False,
-        device="cpu",
     ):
         super(RGCNLayer, self).__init__()
         self.linear_dict = {
diff --git a/examples/pytorch/text_classification/config/CAirline/gat_bi_sep_dependency.json b/examples/pytorch/text_classification/config/CAirline/gat_bi_sep_dependency.json
index 9fa30f7b..20c14193 100644
--- a/examples/pytorch/text_classification/config/CAirline/gat_bi_sep_dependency.json
+++ b/examples/pytorch/text_classification/config/CAirline/gat_bi_sep_dependency.json
@@ -7,5 +7,5 @@
 "model_args.graph_embedding_args.graph_embedding_private.negative_slope": "0.2",
 "model_args.graph_embedding_args.graph_embedding_private.residual": "false",
 "model_args.graph_embedding_args.graph_embedding_private.allow_zero_in_degree": "true",
-"checkpoint_args.out_dir": "out/trec/gat_bi_sep_dependency_ckpt"
+"checkpoint_args.out_dir": "out/CAirline/gat_bi_sep_dependency_ckpt"
 }
diff --git a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_constituency.json b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_constituency.json
index 4ad1b6ff..ffc9be49 100644
--- a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_constituency.json
+++ b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_constituency.json
@@ -2,5 +2,5 @@
 "config_path": "examples/pytorch/text_classification/config/CAirline/text_clf.yaml",
 "model_args.graph_construction_name": "constituency",
 "model_args.graph_construction_args.graph_construction_share.topology_subdir": "constituency_graph",
-"checkpoint_args.out_dir": "out/trec/ggnn_bi_sep_constituency_ckpt"
+"checkpoint_args.out_dir": "out/CAirline/ggnn_bi_sep_constituency_ckpt"
 }
diff --git a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb.json b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb.json
index 12b10e7b..b5470993 100644
--- a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb.json
+++ b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb.json
@@ -13,5 +13,5 @@
 "model_args.graph_embedding_args.graph_embedding_share.hidden_size": "300",
 "model_args.graph_embedding_args.graph_embedding_share.output_size": "300",
 "model_args.graph_embedding_args.graph_embedding_private.use_edge_weight": "true",
-"checkpoint_args.out_dir": "out/trec/ggnn_bi_sep_node_emb_ckpt"
+"checkpoint_args.out_dir": "out/CAirline/ggnn_bi_sep_node_emb_ckpt"
 }
diff --git a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb_refined_dependency.json b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb_refined_dependency.json
index 946f50a4..53b77370 100644
--- a/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb_refined_dependency.json
+++ b/examples/pytorch/text_classification/config/CAirline/ggnn_bi_sep_node_emb_refined_dependency.json
@@ -15,5 +15,5 @@
 "model_args.graph_embedding_args.graph_embedding_share.hidden_size": "300",
 "model_args.graph_embedding_args.graph_embedding_share.output_size": "300",
 "model_args.graph_embedding_args.graph_embedding_private.use_edge_weight": "true",
-"checkpoint_args.out_dir": "out/trec/ggnn_bi_sep_node_emb_refined_dependency_ckpt"
+"checkpoint_args.out_dir": "out/CAirline/ggnn_bi_sep_node_emb_refined_dependency_ckpt"
 }
diff --git a/examples/pytorch/text_classification/config/CAirline/rgcn_dependency.json b/examples/pytorch/text_classification/config/CAirline/rgcn_dependency.json
new file mode 100644
index 00000000..14f8f181
--- /dev/null
+++ b/examples/pytorch/text_classification/config/CAirline/rgcn_dependency.json
@@ -0,0 +1,13 @@
+{
+"config_path": "examples/pytorch/text_classification/config/CAirline/text_clf.yaml",
+"model_args.graph_construction_args.graph_construction_share.topology_subdir": "dependency_graph_for_rgcn",
+"model_args.graph_construction_args.graph_construction_private.edge_strategy": "heterogeneous",
+"model_args.graph_construction_args.graph_construction_private.merge_strategy": "tailhead",
+"model_args.graph_construction_args.graph_construction_private.sequential_link": true,
+"model_args.graph_construction_args.graph_construction_private.as_node": false,
+"model_args.graph_embedding_name": "rgcn",
+"model_args.graph_embedding_args.graph_embedding_share.direction_option": "undirected",
+"model_args.graph_embedding_args.graph_embedding_private.num_rels": 80,
+"model_args.graph_embedding_args.graph_embedding_private.num_bases": 4,
+"checkpoint_args.out_dir": "out/CAirline/rgcn_dependency_ckpt"
+}
diff --git a/examples/pytorch/text_classification/config/trec/rgcn_dependency.json b/examples/pytorch/text_classification/config/trec/rgcn_dependency.json
new file mode 100644
index 00000000..7253078a
--- /dev/null
+++ b/examples/pytorch/text_classification/config/trec/rgcn_dependency.json
@@ -0,0 +1,14 @@
+{
+"config_path": "examples/pytorch/text_classification/config/trec/text_clf.yaml",
+"model_args.graph_construction_args.graph_construction_share.topology_subdir": "dependency_graph_for_rgcn",
+"model_args.graph_construction_args.graph_construction_private.edge_strategy": "heterogeneous",
+"model_args.graph_construction_args.graph_construction_private.merge_strategy": "tailhead",
+"model_args.graph_construction_args.graph_construction_private.sequential_link": true,
+"model_args.graph_construction_args.graph_construction_private.as_node": false,
+"model_args.graph_embedding_name": "rgcn",
+"model_args.graph_embedding_args.graph_embedding_share.direction_option": "undirected",
+"model_args.graph_embedding_args.graph_embedding_private.num_rels": 80,
+"model_args.graph_embedding_args.graph_embedding_private.num_bases": 4,
+"training_args.lr": "0.002",
+"checkpoint_args.out_dir": "out/trec/rgcn_dependency_ckpt"
+}
diff --git a/examples/pytorch/text_classification/run_text_classifier.py b/examples/pytorch/text_classification/run_text_classifier.py
index 037189e6..33a3b9a2 100644
--- a/examples/pytorch/text_classification/run_text_classifier.py
+++ b/examples/pytorch/text_classification/run_text_classifier.py
@@ -31,6 +31,7 @@
 from graph4nlp.pytorch.modules.utils.config_utils import load_json_config
 from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping, to_cuda
 from graph4nlp.pytorch.modules.utils.logger import Logger
+from examples.pytorch.rgcn.rgcn import RGCN
 
 torch.multiprocessing.set_sharing_strategy("file_system")
 
@@ -217,6 +218,26 @@ def __init__(self, vocab, label_model, config):
                     "graph_embedding_private"
                 ]["use_edge_weight"],
             )
+        elif config["model_args"]["graph_embedding_name"] == "rgcn":
+            self.gnn = RGCN(
+                config["model_args"]["graph_embedding_args"]["graph_embedding_share"]["num_layers"],
+                config["model_args"]["graph_embedding_args"]["graph_embedding_share"]["input_size"],
+                config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
+                    "hidden_size"
+                ],
+                config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
+                    "output_size"
+                ],
+                num_rels=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
+                    "num_rels"
+                ],
+                num_bases=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
+                    "num_bases"
+                ],
+                dropout=config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
+                    "feat_drop"
+                ],
+            )
         else:
             raise RuntimeError(
                 "Unknown gnn type: {}".format(config["model_args"]["graph_embedding_name"])

From 82fbb9ce77713e1281447aa7aefaa9984db92adf Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sat, 19 Nov 2022 22:46:51 -0800
Subject: [PATCH 22/28] update readme

---
 .../pytorch/text_classification/readme.md     | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/pytorch/text_classification/readme.md b/examples/pytorch/text_classification/readme.md
index fbf30db5..ee083916 100644
--- a/examples/pytorch/text_classification/readme.md
+++ b/examples/pytorch/text_classification/readme.md
@@ -29,12 +29,12 @@ TREC Results
 -------
 
 
-| GraphType\GNN  |   GAT-BiSep   |   GAT-BiFuse  |  GraphSAGE-BiSep    | GraphSAGE-BiFuse   |  GGNN-BiSep   | GGNN-BiFuse   | 
-| -------------  | ------------- | --------------| ------------------- | -----------------  |-------------- | ------------- |  
-| Dependency     |     0.9480    |   0.9460      |         0.942       |      0.958         |      0.954    |     0.9440    |
-| Constituency   |     0.9420    |   0.9300      |         0.952       |      0.950         |      0.952    |     0.9400    |
-| NodeEmb        |      N/A      |    N/A        |         0.930       |      0.908         |               |               |
-| NodeEmbRefined |      N/A      |    N/A        |         0.940       |      0.926         |               |               |
+| GraphType\GNN  |   GAT-BiSep   |   GAT-BiFuse  |  GraphSAGE-BiSep    | GraphSAGE-BiFuse   |  GGNN-BiSep   | GGNN-BiFuse   | RGCN  | 
+| -------------  | ------------- | --------------| ------------------- | -----------------  |-------------- | ------------- | ----- |
+| Dependency     |     0.9480    |   0.9460      |         0.942       |      0.958         |      0.954    |     0.9440    | 0.946 |
+| Constituency   |     0.9420    |   0.9300      |         0.952       |      0.950         |      0.952    |     0.9400    |       |
+| NodeEmb        |      N/A      |    N/A        |         0.930       |      0.908         |               |               |       |
+| NodeEmbRefined |      N/A      |    N/A        |         0.940       |      0.926         |               |               |       |
 
 
 
@@ -42,10 +42,10 @@ CAirline Results
 -------
 
 
-| GraphType\GNN  |  GAT-BiSep   |  GGNN-BiSep   |GraphSage-BiSep| 
-| -------------- | ------------ | ------------- |---------------|
-| Dependency     | 0.7496       | 0.8020        | 0.7977        |
-| Constituency   | 0.7846       | 0.7933        | 0.7948        |
-| NodeEmb        | N/A          | 0.8108        | 0.8108        |
-| NodeEmbRefined | N/A          | 0.7991        | 0.8020        |
+| GraphType\GNN  |  GAT-BiSep   |  GGNN-BiSep   |GraphSage-BiSep|   RGCN        |
+| -------------- | ------------ | ------------- |---------------|---------------|
+| Dependency     | 0.7496       | 0.8020        | 0.7977        |    0.7525     |
+| Constituency   | 0.7846       | 0.7933        | 0.7948        |    N/A        |
+| NodeEmb        | N/A          | 0.8108        | 0.8108        |    N/A        | 
+| NodeEmbRefined | N/A          | 0.7991        | 0.8020        |    N/A        |
 

From 887f2d14d91602105a294ba86a85dc3bad2cf248 Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sat, 10 Dec 2022 01:12:53 -0800
Subject: [PATCH 23/28] linter & update readme

---
 .../pytorch/math_word_problem/mawps/src/evaluation.py     | 4 ++--
 examples/pytorch/question_generation/main.py              | 3 ++-
 .../semantic_parsing/graph2seq/rgcn_lib/graph2seq.py      | 1 -
 examples/pytorch/text_classification/readme.md            | 8 ++++----
 .../pytorch/text_classification/run_text_classifier.py    | 1 +
 .../pytorch/modules/graph_embedding_learning/rgcn.py      | 2 +-
 graph4nlp/pytorch/test/data_structure/test_graphdata.py   | 3 +--
 graph4nlp/pytorch/test/graph_embedding/run_rgcn.py        | 5 ++---
 8 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/examples/pytorch/math_word_problem/mawps/src/evaluation.py b/examples/pytorch/math_word_problem/mawps/src/evaluation.py
index c26280cc..c4f0fb87 100644
--- a/examples/pytorch/math_word_problem/mawps/src/evaluation.py
+++ b/examples/pytorch/math_word_problem/mawps/src/evaluation.py
@@ -1,8 +1,8 @@
-from graph4nlp.pytorch.modules.evaluation.base import EvaluationMetricBase
-
 import sympy
 from sympy.parsing.sympy_parser import parse_expr
 
+from graph4nlp.pytorch.modules.evaluation.base import EvaluationMetricBase
+
 
 class SolutionMatch(EvaluationMetricBase):
     def __init__(self):
diff --git a/examples/pytorch/question_generation/main.py b/examples/pytorch/question_generation/main.py
index 450c1ded..e35cf87d 100644
--- a/examples/pytorch/question_generation/main.py
+++ b/examples/pytorch/question_generation/main.py
@@ -26,9 +26,10 @@
 from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping, to_cuda
 from graph4nlp.pytorch.modules.utils.logger import Logger
 
-from .fused_embedding_construction import FusedEmbeddingConstruction
 from examples.pytorch.semantic_parsing.graph2seq.rgcn_lib.graph2seq import RGCNGraph2Seq
 
+from .fused_embedding_construction import FusedEmbeddingConstruction
+
 
 class QGModel(nn.Module):
     def __init__(self, vocab, config):
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index ec904835..0a43a59a 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -1,5 +1,4 @@
 from graph4nlp.pytorch.models.graph2seq import Graph2Seq
-
 # from examples.pytorch.rgcn.rgcn import RGCN
 from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
diff --git a/examples/pytorch/text_classification/readme.md b/examples/pytorch/text_classification/readme.md
index ee083916..d8039858 100644
--- a/examples/pytorch/text_classification/readme.md
+++ b/examples/pytorch/text_classification/readme.md
@@ -31,10 +31,10 @@ TREC Results
 
 | GraphType\GNN  |   GAT-BiSep   |   GAT-BiFuse  |  GraphSAGE-BiSep    | GraphSAGE-BiFuse   |  GGNN-BiSep   | GGNN-BiFuse   | RGCN  | 
 | -------------  | ------------- | --------------| ------------------- | -----------------  |-------------- | ------------- | ----- |
-| Dependency     |     0.9480    |   0.9460      |         0.942       |      0.958         |      0.954    |     0.9440    | 0.946 |
-| Constituency   |     0.9420    |   0.9300      |         0.952       |      0.950         |      0.952    |     0.9400    |       |
-| NodeEmb        |      N/A      |    N/A        |         0.930       |      0.908         |               |               |       |
-| NodeEmbRefined |      N/A      |    N/A        |         0.940       |      0.926         |               |               |       |
+| Dependency     |     0.9480    |   0.9460      |         0.942       |      0.958         |      0.954    |     0.944    | 0.946 |
+| Constituency   |     0.9420    |   0.9300      |         0.952       |      0.950         |      0.952    |     0.94    |  N/A  |
+| NodeEmb        |      N/A      |    N/A        |         0.930       |      0.908         |       N/A     |     N/A      | N/A    |
+| NodeEmbRefined |      N/A      |    N/A        |         0.940       |      0.926         |       N/A     |     N/A     |  N/A  |
 
 
 
diff --git a/examples/pytorch/text_classification/run_text_classifier.py b/examples/pytorch/text_classification/run_text_classifier.py
index 33a3b9a2..6a243b09 100644
--- a/examples/pytorch/text_classification/run_text_classifier.py
+++ b/examples/pytorch/text_classification/run_text_classifier.py
@@ -31,6 +31,7 @@
 from graph4nlp.pytorch.modules.utils.config_utils import load_json_config
 from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping, to_cuda
 from graph4nlp.pytorch.modules.utils.logger import Logger
+
 from examples.pytorch.rgcn.rgcn import RGCN
 
 torch.multiprocessing.set_sharing_strategy("file_system")
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index cb435247..5f43bcca 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -1,8 +1,8 @@
 import dgl
 import dgl.function as fn
-from dgl.nn.pytorch.linear import TypedLinear
 import torch
 import torch.nn as nn
+from dgl.nn.pytorch.linear import TypedLinear
 
 from .base import GNNBase, GNNLayerBase
 
diff --git a/graph4nlp/pytorch/test/data_structure/test_graphdata.py b/graph4nlp/pytorch/test/data_structure/test_graphdata.py
index 7d438715..0cf6d8dd 100644
--- a/graph4nlp/pytorch/test/data_structure/test_graphdata.py
+++ b/graph4nlp/pytorch/test/data_structure/test_graphdata.py
@@ -1,14 +1,13 @@
 import gc
 import time
 import matplotlib.pyplot as plt
+import pytest
 import torch
 import torch.nn as nn
 
 from graph4nlp.pytorch.data import GraphData, from_batch, from_dgl, to_batch
 from graph4nlp.pytorch.data.utils import EdgeNotFoundException, SizeMismatchException
 
-import pytest
-
 
 def fail_here():
     raise Exception("The above line of code shouldn't be executed normally")
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index e4b63d8d..88f27631 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -1,16 +1,15 @@
 import argparse
 import dgl
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
 
 from torchmetrics.functional import accuracy
 
-from ...data.data import from_dgl
+from ...data.data import GraphData, from_dgl
 from ...modules.graph_embedding_learning.rgcn import RGCNLayer
 from ...modules.utils.generic_utils import get_config
-import torch.nn as nn
-from ...data.data import GraphData
 
 
 # Load dataset

From 1321d1e90610009629d080449081decd9dbbb2ea Mon Sep 17 00:00:00 2001
From: Yu Chen <hugochan2013@gmail.com>
Date: Sun, 11 Dec 2022 11:46:04 -0800
Subject: [PATCH 24/28] switch to RGCNLayer implemented in
 https://github.com/graph4ai/graph4nlp/pull/584

---
 .../text_classification/run_text_classifier.py       | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/examples/pytorch/text_classification/run_text_classifier.py b/examples/pytorch/text_classification/run_text_classifier.py
index 6a243b09..c316d772 100644
--- a/examples/pytorch/text_classification/run_text_classifier.py
+++ b/examples/pytorch/text_classification/run_text_classifier.py
@@ -32,7 +32,7 @@
 from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping, to_cuda
 from graph4nlp.pytorch.modules.utils.logger import Logger
 
-from examples.pytorch.rgcn.rgcn import RGCN
+from graph4nlp.pytorch.modules.graph_embedding_learning.rgcn import RGCN
 
 torch.multiprocessing.set_sharing_strategy("file_system")
 
@@ -232,12 +232,16 @@ def __init__(self, vocab, label_model, config):
                 num_rels=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
                     "num_rels"
                 ],
+                direction_option=config["model_args"]["graph_embedding_args"][
+                    "graph_embedding_share"
+                ]["direction_option"],
+                feat_drop=config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
+                                    "feat_drop"
+                                ],
+                regularizer="basis",
                 num_bases=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
                     "num_bases"
                 ],
-                dropout=config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
-                    "feat_drop"
-                ],
             )
         else:
             raise RuntimeError(

From a6d0b98c8ca6ec47e5c4361ae1dc441d64ddf669 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Sat, 17 Dec 2022 17:43:58 +0800
Subject: [PATCH 25/28] Migrated RGCN layer to hetero version in DGL

---
 .../graph2seq/rgcn_lib/graph2seq.py           |   2 +-
 .../run_text_classifier.py                    |   2 +-
 graph4nlp/pytorch/data/data.py                |  12 +-
 .../modules/graph_embedding_learning/rgcn.py  | 254 +++++++++++-------
 .../pytorch/test/graph_embedding/run_rgcn.py  |   4 +-
 5 files changed, 171 insertions(+), 103 deletions(-)

diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 0a43a59a..264826dc 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -90,7 +90,7 @@ def _build_gnn_encoder(
             input_size,
             hidden_size,
             output_size,
-            num_rels=gnn_num_rels,
+            rel_names=gnn_num_rels,
             direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
diff --git a/examples/pytorch/text_classification/run_text_classifier.py b/examples/pytorch/text_classification/run_text_classifier.py
index c316d772..510af97e 100644
--- a/examples/pytorch/text_classification/run_text_classifier.py
+++ b/examples/pytorch/text_classification/run_text_classifier.py
@@ -229,7 +229,7 @@ def __init__(self, vocab, label_model, config):
                 config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
                     "output_size"
                 ],
-                num_rels=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
+                rel_names=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
                     "num_rels"
                 ],
                 direction_option=config["model_args"]["graph_embedding_args"][
diff --git a/graph4nlp/pytorch/data/data.py b/graph4nlp/pytorch/data/data.py
index 67ccafa0..daa7366d 100644
--- a/graph4nlp/pytorch/data/data.py
+++ b/graph4nlp/pytorch/data/data.py
@@ -787,7 +787,7 @@ def _data_dict(self) -> Dict[Tuple[str, str, str], Tuple[torch.Tensor, torch.Ten
             )
         return data_dict
 
-    def to_dgl(self) -> dgl.DGLGraph:
+    def to_dgl(self) -> dgl.DGLHeteroGraph:
         """
         Convert to dgl.DGLGraph
         Note that there will be some information loss when calling this function,
@@ -796,8 +796,8 @@ def to_dgl(self) -> dgl.DGLGraph:
 
         Returns
         -------
-        g : dgl.DGLGraph
-            The converted dgl.DGLGraph
+        g : dgl.DGLHeteroGraph
+            The converted dgl.DGLHeteroGraph
         """
         u, v = self._edge_indices.src, self._edge_indices.tgt
         num_nodes = self.get_node_num()
@@ -903,13 +903,13 @@ def make_num_nodes_dict(
 
         return dgl_g
 
-    def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
+    def from_dgl(self, dgl_g: dgl.DGLHeteroGraph, is_hetero=False):
         """
-        Build the graph from dgl.DGLGraph
+        Build the graph from dgl.DGLHeteroGraph
 
         Parameters
         ----------
-        dgl_g : dgl.DGLGraph
+        dgl_g : dgl.DGLHeteroGraph
             The source graph
         """
         if not (self.get_edge_num() == 0 and self.get_node_num() == 0):
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 5f43bcca..523dcd51 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -3,8 +3,131 @@
 import torch
 import torch.nn as nn
 from dgl.nn.pytorch.linear import TypedLinear
+import dgl.nn as dglnn
+import typing as tp
 
 from .base import GNNBase, GNNLayerBase
+from ...data import GraphData, from_dgl
+
+# The implementation of RGCN is copied from DGL
+class RelGraphConvLayer(nn.Module):
+    r"""Relational graph convolution layer.
+
+    Parameters
+    ----------
+    in_feat : int
+        Input feature size.
+    out_feat : int
+        Output feature size.
+    rel_names : list[str]
+        Relation names.
+    num_bases : int, optional
+        Number of bases. If is none, use number of relations. Default: None.
+    weight : bool, optional
+        True if a linear layer is applied after message passing. Default: True
+    bias : bool, optional
+        True if bias is added. Default: True
+    activation : callable, optional
+        Activation function. Default: None
+    self_loop : bool, optional
+        True to include self loop message. Default: False
+    dropout : float, optional
+        Dropout rate. Default: 0.0
+    """
+
+    def __init__(
+        self,
+        in_feat,
+        out_feat,
+        rel_names,
+        num_bases,
+        *,
+        weight=True,
+        bias=True,
+        activation=None,
+        self_loop=False,
+        dropout=0.0,
+    ):
+        super(RelGraphConvLayer, self).__init__()
+        self.in_feat = in_feat
+        self.out_feat = out_feat
+        self.rel_names = rel_names
+        self.num_bases = num_bases
+        self.bias = bias
+        self.activation = activation
+        self.self_loop = self_loop
+
+        self.conv = dglnn.HeteroGraphConv(
+            {
+                rel: dglnn.GraphConv(in_feat, out_feat, norm="right", weight=False, bias=False)
+                for rel in rel_names
+            }
+        )
+
+        self.use_weight = weight
+        self.use_basis = num_bases < len(self.rel_names) and weight
+        if self.use_weight:
+            if self.use_basis:
+                self.basis = dglnn.WeightBasis((in_feat, out_feat), num_bases, len(self.rel_names))
+            else:
+                self.weight = nn.Parameter(torch.Tensor(len(self.rel_names), in_feat, out_feat))
+                nn.init.xavier_uniform_(self.weight, gain=nn.init.calculate_gain("relu"))
+
+        # bias
+        if bias:
+            self.h_bias = nn.Parameter(torch.Tensor(out_feat))
+            nn.init.zeros_(self.h_bias)
+
+        # weight for self loop
+        if self.self_loop:
+            self.loop_weight = nn.Parameter(torch.Tensor(in_feat, out_feat))
+            nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, g, inputs):
+        """Forward computation
+
+        Parameters
+        ----------
+        g : DGLHeteroGraph
+            Input graph.
+        inputs : dict[str, torch.Tensor]
+            Node feature for each node type.
+
+        Returns
+        -------
+        dict[str, torch.Tensor]
+            New node features for each node type.
+        """
+        g = g.local_var()
+        if self.use_weight:
+            weight = self.basis() if self.use_basis else self.weight
+            wdict = {
+                self.rel_names[i]: {"weight": w.squeeze(0)}
+                for i, w in enumerate(torch.split(weight, 1, dim=0))
+            }
+        else:
+            wdict = {}
+
+        if g.is_block:
+            inputs_src = inputs
+            inputs_dst = {k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items()}
+        else:
+            inputs_src = inputs_dst = inputs
+
+        hs = self.conv(g, inputs, mod_kwargs=wdict)
+
+        def _apply(ntype, h):
+            if self.self_loop:
+                h = h + torch.matmul(inputs_dst[ntype], self.loop_weight)
+            if self.bias:
+                h = h + self.h_bias
+            if self.activation:
+                h = self.activation(h)
+            return self.dropout(h)
+
+        return {ntype: _apply(ntype, h) for ntype, h in hs.items()}
 
 
 class RGCN(GNNBase):
@@ -26,8 +149,8 @@ class RGCN(GNNBase):
         Example: [100,50]
     output_size : int
         Output feature size.
-    num_rels : int
-        Number of relations.
+    rel_names : List[str]
+        List of relation names.
     num_bases : int, optional
         Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
     self_loop : bool, optional
@@ -42,7 +165,7 @@ def __init__(
         input_size,
         hidden_size,
         output_size,
-        num_rels,
+        rel_names=None,
         direction_option=None,
         bias=True,
         activation=None,
@@ -53,7 +176,7 @@ def __init__(
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
-        self.num_rels = num_rels
+        self.rel_names = rel_names
         self.self_loop = self_loop
         self.feat_drop = feat_drop
         self.direction_option = direction_option
@@ -62,6 +185,9 @@ def __init__(
         self.RGCN_layers = nn.ModuleList()
         self.regularizer = regularizer
         self.num_basis = num_bases
+        
+        if isinstance(self.rel_names, int):
+            self.rel_names = [str(i) for i in range(self.rel_names)]
 
         # transform the hidden size format
         if self.num_layers > 1 and type(hidden_size) is int:
@@ -73,7 +199,7 @@ def __init__(
                 RGCNLayer(
                     input_size,
                     hidden_size[0],
-                    num_rels=self.num_rels,
+                    rel_names=self.rel_names,
                     direction_option=self.direction_option,
                     bias=self.bias,
                     activation=self.activation,
@@ -90,7 +216,7 @@ def __init__(
                 RGCNLayer(
                     hidden_size[l - 1],
                     hidden_size[l],
-                    num_rels=self.num_rels,
+                    rel_names=self.rel_names,
                     direction_option=self.direction_option,
                     bias=self.bias,
                     activation=self.activation,
@@ -105,7 +231,7 @@ def __init__(
             RGCNLayer(
                 hidden_size[-1] if self.num_layers > 1 else input_size,
                 output_size,
-                num_rels=self.num_rels,
+                rel_names=self.rel_names,
                 direction_option=self.direction_option,
                 bias=self.bias,
                 activation=self.activation,
@@ -119,7 +245,7 @@ def __init__(
         # for k, v in self.named_parameters():
         #     print(f'{k}: {v}')
 
-    def forward(self, graph):
+    def forward(self, graph: GraphData):
         r"""Compute RGCN layer.
 
         Parameters
@@ -135,27 +261,29 @@ def forward(self, graph):
             The graph with generated node embedding stored in the feature field
             named as "node_emb".
         """
-        feat = graph.node_features["node_feat"]
-        if self.direction_option == "bi_sep":
-            h = [feat, feat]
-        else:
-            h = feat
+        # feat = graph.node_features["node_feat"]
+        # if self.direction_option == "bi_sep":
+        #     h = [feat, feat]
+        # else:
+        #     h = feat
 
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
+        h: tp.Dict[str, torch.Tensor] = g.ndata["node_feat"]
         # edge_type = g.edata[dgl.ETYPE].long()
+
         # output projection
         if self.num_layers > 1:
             for l in range(0, self.num_layers - 1):
                 h = self.RGCN_layers[l](g, h)
 
-        logits = self.RGCN_layers[-1](g, h)
+        h = self.RGCN_layers[-1](g, h)
 
         if self.direction_option == "bi_sep":
             logits = torch.cat(logits, -1)
 
-        graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
-        return graph
+        g.ndata["node_emb"] = h  # put the results into the NLPGraph
+        return from_dgl(g=g)
 
 
 class RGCNLayer(GNNLayerBase):
@@ -195,7 +323,7 @@ def __init__(
         self,
         input_size,
         output_size,
-        num_rels,
+        rel_names,
         direction_option=None,
         bias=True,
         activation=None,
@@ -210,7 +338,7 @@ def __init__(
             self.model = UndirectedRGCNLayer(
                 input_size,
                 output_size,
-                num_rels=num_rels,
+                rel_names=rel_names,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -223,7 +351,7 @@ def __init__(
             self.model = BiSepRGCNLayer(
                 input_size,
                 output_size,
-                num_rels=num_rels,
+                num_rels=rel_names,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -236,7 +364,7 @@ def __init__(
             self.model = BiFuseRGCNLayer(
                 input_size,
                 output_size,
-                num_rels=num_rels,
+                num_rels=rel_names,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -248,7 +376,7 @@ def __init__(
         else:
             raise RuntimeError("Unknown `direction_option` value: {}".format(direction_option))
 
-    def forward(self, graph, feat):
+    def forward(self, graph: dgl.DGLHeteroGraph, feat: tp.Dict[str, torch.Tensor]):
         r"""Compute graph attention network layer.
 
         Parameters
@@ -307,7 +435,7 @@ def __init__(
         self,
         input_size,
         output_size,
-        num_rels,
+        rel_names,
         bias=True,
         activation=None,
         self_loop=False,
@@ -315,82 +443,22 @@ def __init__(
         layer_norm=False,
         regularizer=None,
         num_bases=None,
+        dropout=0.0,
+        **kwargs,
     ):
         super(UndirectedRGCNLayer, self).__init__()
-        # self.linear_dict = nn.ModuleDict({
-        #     str(i): nn.Linear(input_size, output_size, bias=bias) for i in range(num_rels)
-        # })
-        self.linear = TypedLinear(
-            in_size=input_size,
-            out_size=output_size,
-            num_types=num_rels,
-            regularizer=regularizer,
+        self.layer = RelGraphConvLayer(
+            in_feat=input_size,
+            out_feat=output_size,
+            rel_names=rel_names,
             num_bases=num_bases,
+            activation=activation,
+            self_loop=self_loop,
+            dropout=dropout,
         )
-        # self.linear_r = TypedLinear(input_size, output_size, num_rels, regularizer, num_bases)
-        self.bias = bias
-        self.activation = activation
-        self.self_loop = self_loop
-        self.layer_norm = layer_norm
-
-        # bias
-        if self.bias:
-            self.h_bias = nn.Parameter(torch.Tensor(output_size))
-            nn.init.zeros_(self.h_bias)
-
-        # layer norm
-        if self.layer_norm:
-            self.layer_norm_weight = nn.LayerNorm(output_size, elementwise_affine=True)
-
-        # weight for self loop
-        if self.self_loop:
-            self.loop_weight = nn.Parameter(torch.Tensor(input_size, output_size))
-            nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
-
-        self.dropout = nn.Dropout(feat_drop)
 
-    def forward(self, g: dgl.DGLHeteroGraph, feat: torch.Tensor, norm=None):
-        def message(edges, g):
-            """Message function."""
-            # ln = self.linear(edges.src['h'], edges.data['type'])
-            # ln = self.linear_dict[str(g.canonical_etypes.index(edges._etype))]
-            # m = ln(edges.src["h"])
-
-            etypes = torch.tensor(
-                [g.canonical_etypes.index(edges._etype)] * edges.src["h"].shape[0]
-            ).to(edges.src["h"].device)
-            m = self.linear(edges.src["h"], etypes)
-
-            if "norm" in edges.data:
-                m = m * edges.data["norm"]
-            return {"m": m}
-
-        # self.presorted = presorted
-        with g.local_scope():
-            g.srcdata["h"] = feat
-            if norm is not None:
-                g.edata["norm"] = norm
-            # g.edata['etype'] = etypes
-            # message passing
-            from functools import partial
-
-            update_dict = {
-                etype: (partial(message, g=g), fn.sum("m", "h")) for etype in g.canonical_etypes
-            }
-            g.multi_update_all(etype_dict=update_dict, cross_reducer="sum")
-            # g.update_all(self.message, fn.sum('m', 'h'))
-            # apply bias and activation
-            h = g.dstdata["h"]
-            if self.layer_norm:
-                h = self.layer_norm_weight(h)
-            if self.bias:
-                h = h + self.h_bias
-            if self.self_loop:
-                h = h + feat[: g.num_dst_nodes()] @ self.loop_weight
-            if self.activation:
-                h = self.activation(h)
-            h = self.dropout(h)
-            return h
+    def forward(self, g: dgl.DGLHeteroGraph, feat: tp.Dict[str, torch.Tensor]):
+        return self.layer(g, feat)
 
 
 class BiFuseRGCNLayer(GNNLayerBase):
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 88f27631..53b21ea9 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -116,7 +116,7 @@ def __init__(
         self.layer_1 = RGCNLayer(
             input_size,
             hidden_size,
-            num_rels=num_rels,
+            rel_names=num_rels,
             direction_option=direction_option,
             bias=bias,
             activation=activation,
@@ -128,7 +128,7 @@ def __init__(
         self.layer_2 = RGCNLayer(
             hidden_size,
             output_size,
-            num_rels=num_rels,
+            rel_names=num_rels,
             direction_option=direction_option,
             bias=bias,
             activation=activation,

From 96b4539c120947b542a6c7548686736284187cc0 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Wed, 4 Jan 2023 15:43:35 +0800
Subject: [PATCH 26/28] fixed rgcn interface with dgl issue with tricks

---
 .gitignore                                    |  2 +
 .../graph2seq/rgcn_lib/graph2seq.py           |  2 +-
 .../run_text_classifier.py                    |  2 +-
 graph4nlp/pytorch/data/data.py                |  5 +-
 .../modules/graph_embedding_learning/rgcn.py  | 83 +++++++++++++------
 .../pytorch/test/graph_embedding/run_rgcn.py  |  4 +-
 6 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1dd7612f..3f9d9a44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,3 +149,5 @@ cscope.*
 # config file
 /config
 local_scripts/
+
+profiler/
\ No newline at end of file
diff --git a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
index 264826dc..0a43a59a 100644
--- a/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
+++ b/examples/pytorch/semantic_parsing/graph2seq/rgcn_lib/graph2seq.py
@@ -90,7 +90,7 @@ def _build_gnn_encoder(
             input_size,
             hidden_size,
             output_size,
-            rel_names=gnn_num_rels,
+            num_rels=gnn_num_rels,
             direction_option=direction_option,
             # num_bases=gnn_num_bases,
             # dropout=feats_dropout,
diff --git a/examples/pytorch/text_classification/run_text_classifier.py b/examples/pytorch/text_classification/run_text_classifier.py
index 510af97e..c316d772 100644
--- a/examples/pytorch/text_classification/run_text_classifier.py
+++ b/examples/pytorch/text_classification/run_text_classifier.py
@@ -229,7 +229,7 @@ def __init__(self, vocab, label_model, config):
                 config["model_args"]["graph_embedding_args"]["graph_embedding_share"][
                     "output_size"
                 ],
-                rel_names=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
+                num_rels=config["model_args"]["graph_embedding_args"]["graph_embedding_private"][
                     "num_rels"
                 ],
                 direction_option=config["model_args"]["graph_embedding_args"][
diff --git a/graph4nlp/pytorch/data/data.py b/graph4nlp/pytorch/data/data.py
index daa7366d..f7a4cb0a 100644
--- a/graph4nlp/pytorch/data/data.py
+++ b/graph4nlp/pytorch/data/data.py
@@ -950,6 +950,8 @@ def from_dgl(self, dgl_g: dgl.DGLHeteroGraph, is_hetero=False):
             processed_node_types = False
             node_feat_dict = {}
             for feature_name, data_dict in node_data.items():
+                if not isinstance(data_dict, Dict):  # DGL will return tensor if ntype is single
+                    data_dict = {0: data_dict}
                 if not processed_node_types:
                     for node_type, node_feature in data_dict.items():
                         ntypes += [node_type] * len(node_feature)
@@ -967,7 +969,8 @@ def from_dgl(self, dgl_g: dgl.DGLHeteroGraph, is_hetero=False):
                 num_edges = dgl_g.num_edges(etype)
                 src_type, r_type, dst_type = etype
                 srcs, dsts = dgl_g.find_edges(
-                    torch.tensor(list(range(num_edges)), dtype=torch.long), etype
+                    torch.tensor(list(range(num_edges)), dtype=torch.long, device=dgl_g.device),
+                    etype,
                 )
                 srcs, dsts = (
                     srcs.detach().cpu().numpy().tolist(),
diff --git a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
index 523dcd51..8d568519 100644
--- a/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
+++ b/graph4nlp/pytorch/modules/graph_embedding_learning/rgcn.py
@@ -39,7 +39,7 @@ def __init__(
         self,
         in_feat,
         out_feat,
-        rel_names,
+        num_rels,
         num_bases,
         *,
         weight=True,
@@ -51,7 +51,7 @@ def __init__(
         super(RelGraphConvLayer, self).__init__()
         self.in_feat = in_feat
         self.out_feat = out_feat
-        self.rel_names = rel_names
+        self.num_rels = num_rels
         self.num_bases = num_bases
         self.bias = bias
         self.activation = activation
@@ -59,18 +59,18 @@ def __init__(
 
         self.conv = dglnn.HeteroGraphConv(
             {
-                rel: dglnn.GraphConv(in_feat, out_feat, norm="right", weight=False, bias=False)
-                for rel in rel_names
+                f"rel_{rel}": dglnn.GraphConv(in_feat, out_feat, norm="right", weight=False, bias=False)
+                for rel in range(num_rels)
             }
         )
 
         self.use_weight = weight
-        self.use_basis = num_bases < len(self.rel_names) and weight
+        self.use_basis = num_bases < self.num_rels and weight
         if self.use_weight:
             if self.use_basis:
-                self.basis = dglnn.WeightBasis((in_feat, out_feat), num_bases, len(self.rel_names))
+                self.basis = dglnn.WeightBasis((in_feat, out_feat), num_bases, self.num_rels)
             else:
-                self.weight = nn.Parameter(torch.Tensor(len(self.rel_names), in_feat, out_feat))
+                self.weight = nn.Parameter(torch.Tensor(self.num_rels, in_feat, out_feat))
                 nn.init.xavier_uniform_(self.weight, gain=nn.init.calculate_gain("relu"))
 
         # bias
@@ -84,8 +84,9 @@ def __init__(
             nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain("relu"))
 
         self.dropout = nn.Dropout(dropout)
+        self.etype_map = {}
 
-    def forward(self, g, inputs):
+    def forward(self, g: dgl.DGLHeteroGraph, inputs: tp.Dict[str, torch.Tensor]):
         """Forward computation
 
         Parameters
@@ -101,10 +102,25 @@ def forward(self, g, inputs):
             New node features for each node type.
         """
         g = g.local_var()
+        
+        # def create_new_graph():
+            
+        
+        new_canonical_etypes = []
+        new_etypes = []
+        for src_type, edge_type, dst_type in g.canonical_etypes:
+            new_edge_type = self.etype_map.setdefault(edge_type, f"rel_{len(self.etype_map)}")
+            new_canonical_etypes.append((src_type, new_edge_type, dst_type))
+            new_etypes.append(new_edge_type)
+        g._etypes = new_etypes
+        g._canonical_etypes = new_canonical_etypes
+        g._etype2canonical = {etype: canonical_etype for etype, canonical_etype in zip(new_etypes, new_canonical_etypes)}
+        g._etypes_invmap = {canonical_etype: i for i, canonical_etype in enumerate(new_canonical_etypes)}
+        
         if self.use_weight:
             weight = self.basis() if self.use_basis else self.weight
             wdict = {
-                self.rel_names[i]: {"weight": w.squeeze(0)}
+                f"rel_{i}": {"weight": w.squeeze(0)}
                 for i, w in enumerate(torch.split(weight, 1, dim=0))
             }
         else:
@@ -165,7 +181,7 @@ def __init__(
         input_size,
         hidden_size,
         output_size,
-        rel_names=None,
+        num_rels=None,
         direction_option=None,
         bias=True,
         activation=None,
@@ -176,7 +192,7 @@ def __init__(
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
-        self.rel_names = rel_names
+        self.num_rels = num_rels
         self.self_loop = self_loop
         self.feat_drop = feat_drop
         self.direction_option = direction_option
@@ -186,8 +202,8 @@ def __init__(
         self.regularizer = regularizer
         self.num_basis = num_bases
         
-        if isinstance(self.rel_names, int):
-            self.rel_names = [str(i) for i in range(self.rel_names)]
+        # if isinstance(self.num_rels, int):
+        #     self.num_rels = [str(i) for i in range(self.num_rels)]
 
         # transform the hidden size format
         if self.num_layers > 1 and type(hidden_size) is int:
@@ -199,7 +215,7 @@ def __init__(
                 RGCNLayer(
                     input_size,
                     hidden_size[0],
-                    rel_names=self.rel_names,
+                    num_rels=self.num_rels,
                     direction_option=self.direction_option,
                     bias=self.bias,
                     activation=self.activation,
@@ -216,7 +232,7 @@ def __init__(
                 RGCNLayer(
                     hidden_size[l - 1],
                     hidden_size[l],
-                    rel_names=self.rel_names,
+                    num_rels=self.num_rels,
                     direction_option=self.direction_option,
                     bias=self.bias,
                     activation=self.activation,
@@ -231,7 +247,7 @@ def __init__(
             RGCNLayer(
                 hidden_size[-1] if self.num_layers > 1 else input_size,
                 output_size,
-                rel_names=self.rel_names,
+                num_rels=self.num_rels,
                 direction_option=self.direction_option,
                 bias=self.bias,
                 activation=self.activation,
@@ -269,21 +285,34 @@ def forward(self, graph: GraphData):
 
         # get the node feature tensor from graph
         g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
-        h: tp.Dict[str, torch.Tensor] = g.ndata["node_feat"]
-        # edge_type = g.edata[dgl.ETYPE].long()
-
+        h: torch.Tensor = g.ndata["node_feat"]
+        
+        # Make node feature dictionary
+        feat_dict: tp.Dict[str, torch.Tensor] = {}
+        import numpy as np
+        node_types = np.array(graph.ntypes,)
+        for i in range(max(node_types) + 1):
+            index = torch.tensor(np.where(node_types == i)[0], device=graph.device)
+            feat_dict[i] = torch.index_select(h, 0, index)
+        
         # output projection
         if self.num_layers > 1:
             for l in range(0, self.num_layers - 1):
-                h = self.RGCN_layers[l](g, h)
+                h = self.RGCN_layers[l](g, feat_dict)
 
         h = self.RGCN_layers[-1](g, h)
 
         if self.direction_option == "bi_sep":
             logits = torch.cat(logits, -1)
 
+        # Unpack node feature dictionary
+        if len(g.ntypes) == 1:
+            h = h[0]
         g.ndata["node_emb"] = h  # put the results into the NLPGraph
-        return from_dgl(g=g)
+        graph_data = from_dgl(g=g)
+        if graph.batch is not None:
+            graph_data.copy_batch_info(graph)
+        return graph_data
 
 
 class RGCNLayer(GNNLayerBase):
@@ -323,7 +352,7 @@ def __init__(
         self,
         input_size,
         output_size,
-        rel_names,
+        num_rels,
         direction_option=None,
         bias=True,
         activation=None,
@@ -338,7 +367,7 @@ def __init__(
             self.model = UndirectedRGCNLayer(
                 input_size,
                 output_size,
-                rel_names=rel_names,
+                num_rels=num_rels,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -351,7 +380,7 @@ def __init__(
             self.model = BiSepRGCNLayer(
                 input_size,
                 output_size,
-                num_rels=rel_names,
+                num_rels=num_rels,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -364,7 +393,7 @@ def __init__(
             self.model = BiFuseRGCNLayer(
                 input_size,
                 output_size,
-                num_rels=rel_names,
+                num_rels=num_rels,
                 bias=bias,
                 activation=activation,
                 self_loop=self_loop,
@@ -435,7 +464,7 @@ def __init__(
         self,
         input_size,
         output_size,
-        rel_names,
+        num_rels,
         bias=True,
         activation=None,
         self_loop=False,
@@ -450,7 +479,7 @@ def __init__(
         self.layer = RelGraphConvLayer(
             in_feat=input_size,
             out_feat=output_size,
-            rel_names=rel_names,
+            num_rels=num_rels,
             num_bases=num_bases,
             activation=activation,
             self_loop=self_loop,
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 53b21ea9..88f27631 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -116,7 +116,7 @@ def __init__(
         self.layer_1 = RGCNLayer(
             input_size,
             hidden_size,
-            rel_names=num_rels,
+            num_rels=num_rels,
             direction_option=direction_option,
             bias=bias,
             activation=activation,
@@ -128,7 +128,7 @@ def __init__(
         self.layer_2 = RGCNLayer(
             hidden_size,
             output_size,
-            rel_names=num_rels,
+            num_rels=num_rels,
             direction_option=direction_option,
             bias=bias,
             activation=activation,

From 7121e020daa93ba76a39889375c630b1af0bf931 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Wed, 11 Jan 2023 01:16:18 +0800
Subject: [PATCH 27/28] Bugfix in GraphData and RGCN testing - Disabled
 Valuerror for single node type for homograph node adding - Fixed DGL
 benchmark test for RGCN

---
 graph4nlp/pytorch/data/data.py                   | 13 ++++++++-----
 .../{ => rgcn_scripts}/run_rgcn_aifb.yaml        |  0
 .../{ => rgcn_scripts}/run_rgcn_am.yaml          |  0
 .../{ => rgcn_scripts}/run_rgcn_bgs.yaml         |  0
 .../{ => rgcn_scripts}/run_rgcn_mutag.yaml       |  0
 .../pytorch/test/graph_embedding/run_rgcn.py     | 16 +++++++++++++---
 6 files changed, 21 insertions(+), 8 deletions(-)
 rename graph4nlp/pytorch/test/graph_embedding/{ => rgcn_scripts}/run_rgcn_aifb.yaml (100%)
 rename graph4nlp/pytorch/test/graph_embedding/{ => rgcn_scripts}/run_rgcn_am.yaml (100%)
 rename graph4nlp/pytorch/test/graph_embedding/{ => rgcn_scripts}/run_rgcn_bgs.yaml (100%)
 rename graph4nlp/pytorch/test/graph_embedding/{ => rgcn_scripts}/run_rgcn_mutag.yaml (100%)

diff --git a/graph4nlp/pytorch/data/data.py b/graph4nlp/pytorch/data/data.py
index f7a4cb0a..383c6647 100644
--- a/graph4nlp/pytorch/data/data.py
+++ b/graph4nlp/pytorch/data/data.py
@@ -176,7 +176,7 @@ def add_nodes(self, node_num: int, ntypes: List[str] = None):
             )
 
         if not self.is_hetero:
-            if ntypes is not None:
+            if ntypes is not None and len(set(ntypes)) > 1:
                 raise ValueError(
                     "The graph is homogeneous, ntypes should be None. Got {}".format(ntypes)
                 )
@@ -950,8 +950,10 @@ def from_dgl(self, dgl_g: dgl.DGLHeteroGraph, is_hetero=False):
             processed_node_types = False
             node_feat_dict = {}
             for feature_name, data_dict in node_data.items():
-                if not isinstance(data_dict, Dict):  # DGL will return tensor if ntype is single
-                    data_dict = {0: data_dict}
+                if not isinstance(data_dict, Dict):  
+                    # DGL will return tensor if ntype is single
+                    # This can happen when graph is a multigraph
+                    data_dict = {dgl_g.ntypes[0]: data_dict}
                 if not processed_node_types:
                     for node_type, node_feature in data_dict.items():
                         ntypes += [node_type] * len(node_feature)
@@ -1389,8 +1391,9 @@ def from_dgl(g: dgl.DGLGraph) -> GraphData:
     GraphData
         The converted graph in GraphData format.
     """
-    graph = GraphData(is_hetero=not g.is_homogeneous)
-    graph.from_dgl(g, is_hetero=not g.is_homogeneous)
+    dgl_g_is_hetero = (not g.is_homogeneous) or g.is_multigraph
+    graph = GraphData(is_hetero=dgl_g_is_hetero)
+    graph.from_dgl(g, is_hetero=dgl_g_is_hetero)
     return graph
 
 
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml b/graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_aifb.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml
rename to graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_aifb.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml b/graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_am.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml
rename to graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_am.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml b/graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_bgs.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml
rename to graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_bgs.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml b/graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_mutag.yaml
similarity index 100%
rename from graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml
rename to graph4nlp/pytorch/test/graph_embedding/rgcn_scripts/run_rgcn_mutag.yaml
diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index 88f27631..d9d3c4cd 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -143,7 +143,17 @@ def __init__(
     def forward(self, g: GraphData):
         node_features = self.emb(torch.IntTensor(list(range(g.get_node_num()))).to('cuda:0'))
         dgl_g = g.to_dgl()
-        x1 = self.layer_1(dgl_g, node_features)
+        
+        # Make node feature dictionary
+        import typing as tp
+        feat_dict: tp.Dict[str, torch.Tensor] = {}
+        import numpy as np
+        node_types = np.array(g.ntypes,)
+        for i in set(node_types):
+            index = torch.tensor(np.where(node_types == i)[0], device=g.device)
+            feat_dict[i] = torch.index_select(node_features, 0, index)
+            
+        x1 = self.layer_1(dgl_g, feat_dict)
         x2 = self.layer_2(dgl_g, x1)
         return x2
 
@@ -181,7 +191,7 @@ def main(config):
     print("start training...")
     my_model.train()
     for epoch in range(config["num_epochs"]):
-        logits = my_model(graph)
+        logits = my_model(graph)['_N']
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
 
@@ -201,7 +211,7 @@ def main(config):
     print("start evaluating...")
     my_model.eval()
     with torch.no_grad():
-        logits = my_model(graph)
+        logits = my_model(graph)['_N']
     logits = logits[target_idx]
     test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
     print("Test Accuracy: {:.4f}".format(test_acc))

From 8eb46ad5e3ab459d775bd7dd82d8ffb7f2ab83b2 Mon Sep 17 00:00:00 2001
From: wsz <wangsaizhuo@idea.edu.cn>
Date: Sat, 14 Jan 2023 11:41:29 +0800
Subject: [PATCH 28/28] mlflow integration and test script

---
 .../pytorch/test/graph_embedding/run_rgcn.py  | 14 ++++++++-
 .../test/graph_embedding/test_rgcn_perf.sh    | 30 ++++++++++++++-----
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
index d9d3c4cd..75762a03 100644
--- a/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
+++ b/graph4nlp/pytorch/test/graph_embedding/run_rgcn.py
@@ -159,6 +159,11 @@ def forward(self, g: GraphData):
 
 
 def main(config):
+    import mlflow
+    mlflow.set_tracking_uri("http://192.168.190.202:45250")
+    mlflow.set_experiment("rgcn_debug")
+    mlflow.start_run(run_name=f"rgcn_debug_{config['dataset']}")
+    
     g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
         data_name=config["dataset"], get_norm=True
     )
@@ -194,7 +199,7 @@ def main(config):
         logits = my_model(graph)['_N']
         logits = logits[target_idx]
         loss = F.cross_entropy(logits[train_idx], labels[train_idx])
-
+        
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
@@ -205,6 +210,10 @@ def main(config):
                 epoch, train_acc, loss.item()
             )
         )
+        mlflow.log_metric("loss", loss.item(), step=epoch)
+        mlflow.log_metric("train_acc", train_acc, step=epoch)
+        
+        
     print()
     # Save Model
     # torch.save(model.state_dict(), "./rgcn_model.pt")
@@ -215,6 +224,9 @@ def main(config):
     logits = logits[target_idx]
     test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
     print("Test Accuracy: {:.4f}".format(test_acc))
+    
+    mlflow.log_metric("test_acc", test_acc)
+    mlflow.end_run()
 
 
 if __name__ == "__main__":
diff --git a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
index 3d91d4bd..a278a813 100755
--- a/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
+++ b/graph4nlp/pytorch/test/graph_embedding/test_rgcn_perf.sh
@@ -1,9 +1,23 @@
 #!/bin/bash
-for i in {1..5}
-do
-    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_aifb_$i.log 2>&1 &
-    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_mutag_$i.log 2>&1 &
-    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_bgs_$i.log 2>&1 &
-    python -m graph4nlp.pytorch.test.graph_embedding.run_rgcn -config graph4nlp/pytorch/test/graph_embedding/run_rgcn_am.yaml > graph4nlp/pytorch/test/graph_embedding/run_rgcn_am_$i.log 2>&1 &
-    # wait
-done
\ No newline at end of file
+
+export test_module=graph4nlp.pytorch.test.graph_embedding.run_rgcn
+export python_command="python -m"
+export config_root=/student/wangsaizhuo/Codes/graph4nlp/graph4nlp/pytorch/test/graph_embedding/rgcn_scripts
+
+test_routine()
+{
+    for dataset in {aifb,am,bgs,mutag}
+    do
+        ${python_command} ${test_module} -config ${config_root}/run_rgcn_${dataset}.yaml &
+    done
+    wait
+}
+
+
+# Test RGCN-Hetero Implementation on dgl benchmarks
+git checkout rgcn-integration
+test_routine()
+
+# Test RGCN-Homo Implementation on dgl benchmarks
+git checkout debug-orig-rgcn
+test_routine()