From 71e07f5ba1606571781b4d3153aef421fc5443bd Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Mon, 19 Sep 2022 15:36:10 +0800
Subject: [PATCH 01/12] Enable

---
 examples/lsc/mag240m/gnn.py | 43 +++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index e0ae7708..ca50a1f8 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -13,7 +13,7 @@
 from torch.nn import ModuleList, Sequential, Linear, BatchNorm1d, ReLU, Dropout
 from torch.optim.lr_scheduler import StepLR
 
-from pytorch_lightning.metrics import Accuracy
+from torchmetrics import Accuracy
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning import (LightningDataModule, LightningModule, Trainer,
                                seed_everything)
@@ -24,7 +24,12 @@
 
 from ogb.lsc import MAG240MDataset, MAG240MEvaluator
 from root import ROOT
-
+from torch_geometric.loader.neighbor_loader import NeighborLoader
+from torch.profiler import profile, ProfilerActivity
+from torch_geometric.data import Data
+from torch_sparse import coalesce
+from torch_geometric.typing import Adj
+import torch_geometric.transforms as T
 
 class Batch(NamedTuple):
     x: Tensor
@@ -214,7 +219,7 @@ def configure_optimizers(self):
     parser.add_argument('--hidden_channels', type=int, default=1024)
     parser.add_argument('--batch_size', type=int, default=1024)
     parser.add_argument('--dropout', type=float, default=0.5)
-    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--epochs', type=int, default=1)
     parser.add_argument('--model', type=str, default='gat',
                         choices=['gat', 'graphsage'])
     parser.add_argument('--sizes', type=str, default='25-15')
@@ -234,7 +239,7 @@ def configure_optimizers(self):
                     num_layers=len(args.sizes), dropout=args.dropout)
         print(f'#Params {sum([p.numel() for p in model.parameters()])}')
         checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode = 'max', save_top_k=1)
-        trainer = Trainer(gpus=args.device, max_epochs=args.epochs,
+        trainer = Trainer(accelerator="cpu", max_epochs=args.epochs,
                           callbacks=[checkpoint_callback],
                           default_root_dir=f'logs/{args.model}')
         trainer.fit(model, datamodule=datamodule)
@@ -246,7 +251,7 @@ def configure_optimizers(self):
         print(f'Evaluating saved model in {logdir}...')
         ckpt = glob.glob(f'{logdir}/checkpoints/*')[0]
 
-        trainer = Trainer(gpus=args.device, resume_from_checkpoint=ckpt)
+        trainer = Trainer(accelerator="cpu", resume_from_checkpoint=ckpt)
         model = GNN.load_from_checkpoint(checkpoint_path=ckpt,
                                          hparams_file=f'{logdir}/hparams.yaml')
 
@@ -255,17 +260,17 @@ def configure_optimizers(self):
 
         trainer.test(model=model, datamodule=datamodule)
 
-        evaluator = MAG240MEvaluator()
-        loader = datamodule.hidden_test_dataloader()
-
-        model.eval()
-        device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
-        model.to(device)
-        y_preds = []
-        for batch in tqdm(loader):
-            batch = batch.to(device)
-            with torch.no_grad():
-                out = model(batch.x, batch.adjs_t).argmax(dim=-1).cpu()
-                y_preds.append(out)
-        res = {'y_pred': torch.cat(y_preds, dim=0)}
-        evaluator.save_test_submission(res, f'results/{args.model}', mode = 'test-dev')
+        # evaluator = MAG240MEvaluator()
+        # loader = datamodule.hidden_test_dataloader()
+
+        # model.eval()
+        # device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
+        # model.to(device)
+        # y_preds = []
+        # for batch in tqdm(loader):
+        #     batch = batch.to(device)
+        #     with torch.no_grad():
+        #         out = model(batch.x, batch.adjs_t).argmax(dim=-1).cpu()
+        #         y_preds.append(out)
+        # res = {'y_pred': torch.cat(y_preds, dim=0)}
+        # evaluator.save_test_submission(res, f'results/{args.model}', mode = 'test-dev')

From 1264086c5e7716609bbda8beee32f5f73adc2e46 Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Tue, 20 Sep 2022 14:22:38 +0800
Subject: [PATCH 02/12] MAG240M to HeteroData

---
 ogb/lsc/mag240m.py | 66 +++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index 63580f48..ae608bd1 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -9,6 +9,7 @@
 
 from ogb.utils.url import decide_download, download_url, extract_zip, makedirs
 from ogb.lsc.utils import split_test
+from torch_geometric.data import HeteroData
 
 
 class MAG240MDataset(object):
@@ -53,6 +54,25 @@ def download(self):
                 print('Stop download.')
                 exit(-1)
 
+    def to_pyg_hetero_data(self):
+        data = HeteroData()
+        path = osp.join(self.dir, 'processed', 'paper', 'node_feat.npy')
+        # Current is not in-memory
+        data["paper"].x = torch.from_numpy(np.load(path, mmap_mode='r'))
+        path = osp.join(self.dir, 'processed', 'paper', 'node_label.npy')
+        data["paper"].y = torch.from_numpy(np.load(path))
+        path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy')
+        data["paper"].year = torch.from_numpy(np.load(path, mmap_mode='r'))
+        
+        for edge_type in [('author', 'affiliated_with', 'institution'),
+                          ('author', 'writes', 'paper'),
+                          ('paper', 'cites', 'paper')]:
+            name = '___'.join(edge_type)
+            path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
+            edge_index = torch.from_numpy(np.load(path))
+            data[edge_type].edge_index = edge_index
+        return data
+
     @property
     def num_papers(self) -> int:
         return self.__meta__['paper']
@@ -108,14 +128,14 @@ def all_paper_year(self) -> np.ndarray:
         path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy')
         return np.load(path)
 
-    def edge_index(self, id1: str, id2: str,
-                   id3: Optional[str] = None) -> np.ndarray:
-        src = id1
-        rel, dst = (id3, id2) if id3 is None else (id2, id3)
-        rel = self.__rels__[(src, dst)] if rel is None else rel
-        name = f'{src}___{rel}___{dst}'
-        path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
-        return np.load(path)
+    # def edge_index(self, id1: str, id2: str,
+    #                id3: Optional[str] = None) -> np.ndarray:
+    #     src = id1
+    #     rel, dst = (id3, id2) if id3 is None else (id2, id3)
+    #     rel = self.__rels__[(src, dst)] if rel is None else rel
+    #     name = f'{src}___{rel}___{dst}'
+    #     path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
+    #     return np.load(path)
 
     def __repr__(self) -> str:
         return f'{self.__class__.__name__}()'
@@ -163,7 +183,8 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
 
 
 if __name__ == '__main__':
-    dataset = MAG240MDataset()
+    dataset = MAG240MDataset('/home/user/yanbing/pyg/ogb/ogb/lsc/dataset')
+    data = dataset.to_pyg_hetero_data()
     print(dataset)
     print(dataset.num_papers)
     print(dataset.num_authors)
@@ -196,12 +217,15 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
 
     exit(-1)
 
-    print(dataset.paper_feat.shape)
-    print(dataset.paper_year.shape)
-    print(dataset.paper_year[:100])
-    print(dataset.edge_index('author', 'paper').shape)
-    print(dataset.edge_index('author', 'writes', 'paper').shape)
-    print(dataset.edge_index('author', 'writes', 'paper')[:, :10])
+    print(data['paper'].x.shape)
+    print(data['paper'].year.shape)
+    print(data['paper'].year[:100])
+    print(data[(('author', 'writes', 'paper'))].edge_index.shape)
+    print(data[('author', 'affiliated_with', 'institution')].edge_index.shape)
+    print(data[('paper', 'cites', 'paper')].edge_index.shape)
+    print(data[('author', 'writes', 'paper')].edge_index[:, :10])
+    print(data[('author', 'affiliated_with', 'institution')].edge_index[:, :10])
+    print(data[('paper', 'cites', 'paper')].edge_index[:, :10])
     print('-----------------')
 
     train_idx = dataset.get_idx_split('train')
@@ -209,9 +233,9 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
     test_idx = dataset.get_idx_split('test-whole')
     print(len(train_idx) + len(val_idx) + len(test_idx))
 
-    print(dataset.paper_label[train_idx][:10])
-    print(dataset.paper_label[val_idx][:10])
-    print(dataset.paper_label[test_idx][:10])
-    print(dataset.paper_year[train_idx][:10])
-    print(dataset.paper_year[val_idx][:10])
-    print(dataset.paper_year[test_idx][:10])
+    print(data['paper'].y[train_idx][:10])
+    print(data['paper'].y[val_idx][:10])
+    print(data['paper'].y[test_idx][:10])
+    print(data['paper'].year[train_idx][:10])
+    print(data['paper'].year[val_idx][:10])
+    print(data['paper'].year[test_idx][:10])

From 10ee86a14f0b78248d7a56e6c4813dcd40c19853 Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Wed, 21 Sep 2022 16:26:58 +0800
Subject: [PATCH 03/12] Try convert model to_hetero

---
 examples/lsc/mag240m/gnn.py | 251 ++++++++++++++++++++----------------
 1 file changed, 142 insertions(+), 109 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index ca50a1f8..a53bbe82 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -19,30 +19,19 @@
                                seed_everything)
 
 from torch_sparse import SparseTensor
-from torch_geometric.nn import SAGEConv, GATConv
+from torch_geometric.nn import SAGEConv, GATConv, to_hetero
 from torch_geometric.data import NeighborSampler
 
 from ogb.lsc import MAG240MDataset, MAG240MEvaluator
 from root import ROOT
 from torch_geometric.loader.neighbor_loader import NeighborLoader
-from torch.profiler import profile, ProfilerActivity
-from torch_geometric.data import Data
-from torch_sparse import coalesce
 from torch_geometric.typing import Adj
 import torch_geometric.transforms as T
+from torch_geometric.typing import EdgeType, NodeType
+from typing import Dict, Tuple
+from torch_geometric.data import Batch
 
-class Batch(NamedTuple):
-    x: Tensor
-    y: Tensor
-    adjs_t: List[SparseTensor]
-
-    def to(self, *args, **kwargs):
-        return Batch(
-            x=self.x.to(*args, **kwargs),
-            y=self.y.to(*args, **kwargs),
-            adjs_t=[adj_t.to(*args, **kwargs) for adj_t in self.adjs_t],
-        )
-
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 class MAG240M(LightningDataModule):
     def __init__(self, data_dir: str, batch_size: int, sizes: List[int],
@@ -52,6 +41,7 @@ def __init__(self, data_dir: str, batch_size: int, sizes: List[int],
         self.batch_size = batch_size
         self.sizes = sizes
         self.in_memory = in_memory
+        self.transform = T.ToUndirected(merge=False)
 
     @property
     def num_features(self) -> int:
@@ -63,23 +53,25 @@ def num_classes(self) -> int:
 
     def prepare_data(self):
         dataset = MAG240MDataset(self.data_dir)
+        self.data = dataset.to_pyg_hetero_data()
         path = f'{dataset.dir}/paper_to_paper_symmetric.pt'
         if not osp.exists(path):
             t = time.perf_counter()
             print('Converting adjacency matrix...', end=' ', flush=True)
-            edge_index = dataset.edge_index('paper', 'cites', 'paper')
-            edge_index = torch.from_numpy(edge_index)
-            adj_t = SparseTensor(
-                row=edge_index[0], col=edge_index[1],
-                sparse_sizes=(dataset.num_papers, dataset.num_papers),
-                is_sorted=True)
-            torch.save(adj_t.to_symmetric(), path)
+            edge_index = self.data[('paper', 'cites', 'paper')].edge_index
+            # edge_index = torch.from_numpy(edge_index)
+            # adj_t = SparseTensor(
+            #     row=edge_index[0], col=edge_index[1],
+            #     sparse_sizes=(dataset.num_papers, dataset.num_papers),
+            #     is_sorted=True)
+            # torch.save(adj_t.to_symmetric(), path)
             print(f'Done! [{time.perf_counter() - t:.2f}s]')
 
     def setup(self, stage: Optional[str] = None):
         t = time.perf_counter()
         print('Reading dataset...', end=' ', flush=True)
         dataset = MAG240MDataset(self.data_dir)
+        self.data = dataset.to_pyg_hetero_data()
 
         self.train_idx = torch.from_numpy(dataset.get_idx_split('train'))
         self.train_idx = self.train_idx
@@ -88,123 +80,163 @@ def setup(self, stage: Optional[str] = None):
         self.val_idx.share_memory_()
         self.test_idx = torch.from_numpy(dataset.get_idx_split('test-dev'))
         self.test_idx.share_memory_()
+        print("train_idx", self.train_idx)
+        print("val_idx", self.val_idx)
+        print("test_idx", self.test_idx)
 
         if self.in_memory:
             self.x = torch.from_numpy(dataset.all_paper_feat).share_memory_()
         else:
-            self.x = dataset.paper_feat
-        self.y = torch.from_numpy(dataset.all_paper_label)
+            self.x = self.data['paper'].x
+        self.y = self.data['paper'].y
 
-        path = f'{dataset.dir}/paper_to_paper_symmetric.pt'
-        self.adj_t = torch.load(path)
+        # path = f'{dataset.dir}/paper_to_paper_symmetric.pt'
+        # self.adj_t = torch.load(path)
         print(f'Done! [{time.perf_counter() - t:.2f}s]')
 
+    def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
+        node_types = ['paper']
+        edge_types = [('author', 'affiliated_with', 'institution'),
+                      ('author', 'writes', 'paper'),
+                      ('paper', 'cites', 'paper')]
+        return node_types, edge_types        
+
     def train_dataloader(self):
-        return NeighborSampler(self.adj_t, node_idx=self.train_idx,
-                               sizes=self.sizes, return_e_id=False,
-                               transform=self.convert_batch,
-                               batch_size=self.batch_size, shuffle=True,
-                               num_workers=4)
+        return NeighborLoader(self.data, num_neighbors=self.sizes,
+                              input_nodes=self.train_idx,
+                                batch_size=self.batch_size, shuffle=True,
+                                num_workers=4)
 
     def val_dataloader(self):
-        return NeighborSampler(self.adj_t, node_idx=self.val_idx,
-                               sizes=self.sizes, return_e_id=False,
-                               transform=self.convert_batch,
-                               batch_size=self.batch_size, num_workers=2)
+        return NeighborLoader(self.data, num_neighbors=self.sizes,
+                              input_nodes=self.val_idx,
+                                batch_size=self.batch_size, num_workers=2)
 
     def test_dataloader(self):  # Test best validation model once again.
-        return NeighborSampler(self.adj_t, node_idx=self.val_idx,
-                               sizes=self.sizes, return_e_id=False,
-                               transform=self.convert_batch,
-                               batch_size=self.batch_size, num_workers=2)
+        return NeighborLoader(self.data, num_neighbors=self.sizes,
+                              input_nodes=self.val_idx, 
+                                batch_size=self.batch_size, num_workers=2)
 
     def hidden_test_dataloader(self):
-        return NeighborSampler(self.adj_t, node_idx=self.test_idx,
-                               sizes=self.sizes, return_e_id=False,
-                               transform=self.convert_batch,
-                               batch_size=self.batch_size, num_workers=3)
+        return NeighborLoader(self.data, num_neighbors=self.sizes,
+                              input_nodes=self.test_idx,
+                                batch_size=self.batch_size, num_workers=3)
 
-    def convert_batch(self, batch_size, n_id, adjs):
-        if self.in_memory:
-            x = self.x[n_id].to(torch.float)
-        else:
-            x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float)
-        y = self.y[n_id[:batch_size]].to(torch.long)
-        return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs])
+    # def convert_batch(self, batch_size, n_id, adjs):
+    #     if self.in_memory:
+    #         x = self.x[n_id].to(torch.float)
+    #     else:
+    #         x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float)
+    #     y = self.y[n_id[:batch_size]].to(torch.long)
+    #     return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs])
 
 
-class GNN(LightningModule):
+class GNN(torch.nn.Module):
     def __init__(self, model: str, in_channels: int, out_channels: int,
                  hidden_channels: int, num_layers: int, heads: int = 4,
                  dropout: float = 0.5):
         super().__init__()
-        self.save_hyperparameters()
         self.model = model.lower()
         self.dropout = dropout
-
-        self.convs = ModuleList()
-        self.norms = ModuleList()
-        self.skips = ModuleList()
-
-        if self.model == 'gat':
-            self.convs.append(
-                GATConv(in_channels, hidden_channels // heads, heads))
-            self.skips.append(Linear(in_channels, hidden_channels))
-            for _ in range(num_layers - 1):
-                self.convs.append(
-                    GATConv(hidden_channels, hidden_channels // heads, heads))
-                self.skips.append(Linear(hidden_channels, hidden_channels))
-
-        elif self.model == 'graphsage':
-            self.convs.append(SAGEConv(in_channels, hidden_channels))
-            for _ in range(num_layers - 1):
-                self.convs.append(SAGEConv(hidden_channels, hidden_channels))
-
-        for _ in range(num_layers):
-            self.norms.append(BatchNorm1d(hidden_channels))
-
-        self.mlp = Sequential(
-            Linear(hidden_channels, hidden_channels),
-            BatchNorm1d(hidden_channels),
-            ReLU(inplace=True),
-            Dropout(p=self.dropout),
-            Linear(hidden_channels, out_channels),
-        )
-
+        self.num_layers = num_layers
+
+        # self.convs = ModuleList()
+        # self.norms = ModuleList()
+        # self.skips = ModuleList()
+
+        # if self.model == 'gat':
+        #     self.convs.append(
+        #         GATConv(in_channels, hidden_channels // heads, heads))
+        #     self.skips.append(Linear(in_channels, hidden_channels))
+        #     for _ in range(num_layers - 1):
+        #         self.convs.append(
+        #             GATConv(hidden_channels, hidden_channels // heads, heads))
+        #         self.skips.append(Linear(hidden_channels, hidden_channels))
+
+        # elif self.model == 'graphsage':
+        #     self.convs.append(SAGEConv(in_channels, hidden_channels))
+        #     for _ in range(num_layers - 1):
+        #         self.convs.append(SAGEConv(hidden_channels, hidden_channels))
+
+        # for _ in range(num_layers):
+        #     self.norms.append(BatchNorm1d(hidden_channels))
+
+        # self.mlp = Sequential(
+        #     Linear(hidden_channels, hidden_channels),
+        #     BatchNorm1d(hidden_channels),
+        #     ReLU(inplace=True),
+        #     Dropout(p=self.dropout),
+        #     Linear(hidden_channels, out_channels),
+        # )
+        
+        self.conv1 = SAGEConv(in_channels, hidden_channels)
+        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
+        self.lin = Linear(hidden_channels, out_channels)
+
+    def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
+        # for i in range(self.num_layers):
+        #     x = self.convs[i](x, edge_index)
+        #     if self.model == 'gat':
+        #         x = x + self.skips[i](x)
+        #         x = F.elu(self.norms[i](x))
+        #     elif self.model == 'graphsage':
+        #         x = F.relu(self.norms[i](x))
+        #     x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.conv1(x, edge_index).relu()
+        x = self.conv2(x, edge_index).relu()
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        return x
+        # return self.mlp(x)
+
+class HeteroGNN(LightningModule):
+    def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeType]], in_channels: int, out_channels: int,
+                 hidden_channels: int, num_layers: int, heads: int = 4,
+                 dropout: float = 0.5):
+        super().__init__()
+        self.save_hyperparameters()
+        model = GNN(model_name, in_channels, out_channels, hidden_channels, num_layers, heads=heads, dropout=dropout)
+        self.model = to_hetero(model, metadata, aggr='sum').to(device)
         self.train_acc = Accuracy()
         self.val_acc = Accuracy()
         self.test_acc = Accuracy()
 
-    def forward(self, x: Tensor, adjs_t: List[SparseTensor]) -> Tensor:
-        for i, adj_t in enumerate(adjs_t):
-            x_target = x[:adj_t.size(0)]
-            x = self.convs[i]((x, x_target), adj_t)
-            if self.model == 'gat':
-                x = x + self.skips[i](x_target)
-                x = F.elu(self.norms[i](x))
-            elif self.model == 'graphsage':
-                x = F.relu(self.norms[i](x))
-            x = F.dropout(x, p=self.dropout, training=self.training)
-
-        return self.mlp(x)
-
-    def training_step(self, batch, batch_idx: int):
-        y_hat = self(batch.x, batch.adjs_t)
-        train_loss = F.cross_entropy(y_hat, batch.y)
-        self.train_acc(y_hat.softmax(dim=-1), batch.y)
+    def forward(
+        self,
+        x_dict: Dict[NodeType, Tensor],
+        edge_index_dict: Dict[EdgeType, Tensor],
+    ) -> Dict[NodeType, Tensor]:
+        return self.model(x_dict, edge_index_dict)
+
+    # @torch.no_grad()
+    # def setup(self, stage: Optional[str] = None):  # Initialize parameters.
+    #     data = self.trainer.datamodule
+    #     loader = data.dataloader(torch.arange(1), shuffle=False, num_workers=0)
+    #     batch = next(iter(loader))
+    #     self(batch.x_dict, batch.edge_index_dict)
+
+    def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]:
+        batch_size = batch['paper'].batch_size
+        y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size]
+        y = batch['paper'].y[:batch_size]
+        return y_hat, y
+
+    def training_step(self, batch: Batch, batch_idx: int) -> Tensor:
+        y_hat, y = self.common_step(batch)
+        train_loss = F.cross_entropy(y_hat, y)
+        self.train_acc(y_hat.softmax(dim=-1), y)
         self.log('train_acc', self.train_acc, prog_bar=True, on_step=False,
                  on_epoch=True)
         return train_loss
 
-    def validation_step(self, batch, batch_idx: int):
-        y_hat = self(batch.x, batch.adjs_t)
-        self.val_acc(y_hat.softmax(dim=-1), batch.y)
+    def validation_step(self, batch: Batch, batch_idx: int):
+        y_hat, y = self.common_step(batch)
+        self.val_acc(y_hat.softmax(dim=-1), y)
         self.log('val_acc', self.val_acc, on_step=False, on_epoch=True,
                  prog_bar=True, sync_dist=True)
 
-    def test_step(self, batch, batch_idx: int):
-        y_hat = self(batch.x, batch.adjs_t)
-        self.test_acc(y_hat.softmax(dim=-1), batch.y)
+    def test_step(self, batch: Batch, batch_idx: int):
+        y_hat, y = self.common_step(batch)
+        self.test_acc(y_hat.softmax(dim=-1), y)
         self.log('test_acc', self.test_acc, on_step=False, on_epoch=True,
                  prog_bar=True, sync_dist=True)
 
@@ -222,7 +254,7 @@ def configure_optimizers(self):
     parser.add_argument('--epochs', type=int, default=1)
     parser.add_argument('--model', type=str, default='gat',
                         choices=['gat', 'graphsage'])
-    parser.add_argument('--sizes', type=str, default='25-15')
+    parser.add_argument('--sizes', type=str, default='2')
     parser.add_argument('--in-memory', action='store_true')
     parser.add_argument('--device', type=str, default='0')
     parser.add_argument('--evaluate', action='store_true')
@@ -232,9 +264,10 @@ def configure_optimizers(self):
 
     seed_everything(42)
     datamodule = MAG240M(ROOT, args.batch_size, args.sizes, args.in_memory)
+    print(datamodule.metadata())
 
     if not args.evaluate:
-        model = GNN(args.model, datamodule.num_features,
+        model = HeteroGNN(args.model, datamodule.metadata(), datamodule.num_features,
                     datamodule.num_classes, args.hidden_channels,
                     num_layers=len(args.sizes), dropout=args.dropout)
         print(f'#Params {sum([p.numel() for p in model.parameters()])}')
@@ -252,7 +285,7 @@ def configure_optimizers(self):
         ckpt = glob.glob(f'{logdir}/checkpoints/*')[0]
 
         trainer = Trainer(accelerator="cpu", resume_from_checkpoint=ckpt)
-        model = GNN.load_from_checkpoint(checkpoint_path=ckpt,
+        model = HeteroGNN.load_from_checkpoint(checkpoint_path=ckpt,
                                          hparams_file=f'{logdir}/hparams.yaml')
 
         datamodule.batch_size = 16

From cfdf217b884ecead8e783fdde4893b4b7cd79f9f Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Thu, 22 Sep 2022 10:52:17 +0800
Subject: [PATCH 04/12] Add author and institution as node types

---
 ogb/lsc/mag240m.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index ae608bd1..ea4f1a51 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -58,11 +58,13 @@ def to_pyg_hetero_data(self):
         data = HeteroData()
         path = osp.join(self.dir, 'processed', 'paper', 'node_feat.npy')
         # Current is not in-memory
-        data["paper"].x = torch.from_numpy(np.load(path, mmap_mode='r'))
+        data['paper'].x = torch.from_numpy(np.load(path, mmap_mode='r'))
         path = osp.join(self.dir, 'processed', 'paper', 'node_label.npy')
-        data["paper"].y = torch.from_numpy(np.load(path))
+        data['paper'].y = torch.from_numpy(np.load(path))
         path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy')
-        data["paper"].year = torch.from_numpy(np.load(path, mmap_mode='r'))
+        data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r'))
+        data['author'].num_nodes = self.__meta__['author']
+        data['institution'].num_nodes = self.__meta__['institution']
         
         for edge_type in [('author', 'affiliated_with', 'institution'),
                           ('author', 'writes', 'paper'),

From 7480eea2527af6185f1431c821c8dc4353a67d45 Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Mon, 26 Sep 2022 14:21:50 +0800
Subject: [PATCH 05/12] Use LightningNodeData

---
 examples/lsc/mag240m/gnn.py | 92 ++++---------------------------------
 ogb/lsc/mag240m.py          |  7 +++
 2 files changed, 15 insertions(+), 84 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index a53bbe82..71fb19dc 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -30,18 +30,13 @@
 from torch_geometric.typing import EdgeType, NodeType
 from typing import Dict, Tuple
 from torch_geometric.data import Batch
+from torch_geometric.data import LightningNodeData
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
-class MAG240M(LightningDataModule):
-    def __init__(self, data_dir: str, batch_size: int, sizes: List[int],
-                 in_memory: bool = False):
-        super().__init__()
-        self.data_dir = data_dir
-        self.batch_size = batch_size
-        self.sizes = sizes
-        self.in_memory = in_memory
-        self.transform = T.ToUndirected(merge=False)
+class MAG240M(LightningNodeData):
+    def __init__(self, *args, **kwargs):
+        super(MAG240M, self).__init__(*args, **kwargs)
 
     @property
     def num_features(self) -> int:
@@ -51,49 +46,6 @@ def num_features(self) -> int:
     def num_classes(self) -> int:
         return 153
 
-    def prepare_data(self):
-        dataset = MAG240MDataset(self.data_dir)
-        self.data = dataset.to_pyg_hetero_data()
-        path = f'{dataset.dir}/paper_to_paper_symmetric.pt'
-        if not osp.exists(path):
-            t = time.perf_counter()
-            print('Converting adjacency matrix...', end=' ', flush=True)
-            edge_index = self.data[('paper', 'cites', 'paper')].edge_index
-            # edge_index = torch.from_numpy(edge_index)
-            # adj_t = SparseTensor(
-            #     row=edge_index[0], col=edge_index[1],
-            #     sparse_sizes=(dataset.num_papers, dataset.num_papers),
-            #     is_sorted=True)
-            # torch.save(adj_t.to_symmetric(), path)
-            print(f'Done! [{time.perf_counter() - t:.2f}s]')
-
-    def setup(self, stage: Optional[str] = None):
-        t = time.perf_counter()
-        print('Reading dataset...', end=' ', flush=True)
-        dataset = MAG240MDataset(self.data_dir)
-        self.data = dataset.to_pyg_hetero_data()
-
-        self.train_idx = torch.from_numpy(dataset.get_idx_split('train'))
-        self.train_idx = self.train_idx
-        self.train_idx.share_memory_()
-        self.val_idx = torch.from_numpy(dataset.get_idx_split('valid'))
-        self.val_idx.share_memory_()
-        self.test_idx = torch.from_numpy(dataset.get_idx_split('test-dev'))
-        self.test_idx.share_memory_()
-        print("train_idx", self.train_idx)
-        print("val_idx", self.val_idx)
-        print("test_idx", self.test_idx)
-
-        if self.in_memory:
-            self.x = torch.from_numpy(dataset.all_paper_feat).share_memory_()
-        else:
-            self.x = self.data['paper'].x
-        self.y = self.data['paper'].y
-
-        # path = f'{dataset.dir}/paper_to_paper_symmetric.pt'
-        # self.adj_t = torch.load(path)
-        print(f'Done! [{time.perf_counter() - t:.2f}s]')
-
     def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
         node_types = ['paper']
         edge_types = [('author', 'affiliated_with', 'institution'),
@@ -101,36 +53,6 @@ def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
                       ('paper', 'cites', 'paper')]
         return node_types, edge_types        
 
-    def train_dataloader(self):
-        return NeighborLoader(self.data, num_neighbors=self.sizes,
-                              input_nodes=self.train_idx,
-                                batch_size=self.batch_size, shuffle=True,
-                                num_workers=4)
-
-    def val_dataloader(self):
-        return NeighborLoader(self.data, num_neighbors=self.sizes,
-                              input_nodes=self.val_idx,
-                                batch_size=self.batch_size, num_workers=2)
-
-    def test_dataloader(self):  # Test best validation model once again.
-        return NeighborLoader(self.data, num_neighbors=self.sizes,
-                              input_nodes=self.val_idx, 
-                                batch_size=self.batch_size, num_workers=2)
-
-    def hidden_test_dataloader(self):
-        return NeighborLoader(self.data, num_neighbors=self.sizes,
-                              input_nodes=self.test_idx,
-                                batch_size=self.batch_size, num_workers=3)
-
-    # def convert_batch(self, batch_size, n_id, adjs):
-    #     if self.in_memory:
-    #         x = self.x[n_id].to(torch.float)
-    #     else:
-    #         x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float)
-    #     y = self.y[n_id[:batch_size]].to(torch.long)
-    #     return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs])
-
-
 class GNN(torch.nn.Module):
     def __init__(self, model: str, in_channels: int, out_channels: int,
                  hidden_channels: int, num_layers: int, heads: int = 4,
@@ -263,8 +185,10 @@ def configure_optimizers(self):
     print(args)
 
     seed_everything(42)
-    datamodule = MAG240M(ROOT, args.batch_size, args.sizes, args.in_memory)
-    print(datamodule.metadata())
+    dataset = MAG240MDataset(ROOT)
+    data = dataset.to_pyg_hetero_data()
+    datamodule = MAG240M(data, loader='neighbor', num_neighbors=args.sizes, batch_size=args.batch_size, num_workers=2)
+    print(datamodule)
 
     if not args.evaluate:
         model = HeteroGNN(args.model, datamodule.metadata(), datamodule.num_features,
diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index ea4f1a51..32993c2c 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -73,6 +73,13 @@ def to_pyg_hetero_data(self):
             path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
             edge_index = torch.from_numpy(np.load(path))
             data[edge_type].edge_index = edge_index
+
+        for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]:
+            idx = self.get_idx_split(f)
+            idx = torch.from_numpy(idx)
+            mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool)
+            mask[idx] = True
+            data['paper'][f'{v}_mask'] = mask
         return data
 
     @property

From 12326066c25184a98e3ed75b44e9ed23f6fff16c Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Tue, 27 Sep 2022 10:45:31 +0800
Subject: [PATCH 06/12] Add inst.npy

---
 ogb/lsc/mag240m.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index 32993c2c..062f3fdd 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -65,6 +65,8 @@ def to_pyg_hetero_data(self):
         data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r'))
         data['author'].num_nodes = self.__meta__['author']
         data['institution'].num_nodes = self.__meta__['institution']
+        path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
+        data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features))
         
         for edge_type in [('author', 'affiliated_with', 'institution'),
                           ('author', 'writes', 'paper'),

From 70f149ab07ff2759696dc9e86bf192764bc60c6e Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Wed, 28 Sep 2022 10:05:12 +0800
Subject: [PATCH 07/12] Add author.npy

---
 ogb/lsc/mag240m.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index 062f3fdd..723d5b7a 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -63,10 +63,20 @@ def to_pyg_hetero_data(self):
         data['paper'].y = torch.from_numpy(np.load(path))
         path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy')
         data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r'))
+
         data['author'].num_nodes = self.__meta__['author']
+        path = osp.join(self.dir, 'processed', 'author', 'author.npy')
+        data['author'].x = np.memmap(path, mode='r', shape=(data['author'].num_nodes, self.num_paper_features))
         data['institution'].num_nodes = self.__meta__['institution']
         path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
         data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features))
+
+        # data['author'].num_nodes = self.__meta__['author']
+        # data['institution'].num_nodes = self.__meta__['institution']
+        # path = osp.join(self.dir, 'processed', 'author', 'author.npy')
+        # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True)
+        # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
+        # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True)
         
         for edge_type in [('author', 'affiliated_with', 'institution'),
                           ('author', 'writes', 'paper'),

From e93c89cb998dbb103031396920a4f5f542906023 Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Thu, 20 Oct 2022 14:45:41 +0800
Subject: [PATCH 08/12] Add 3 nodes and remove relu/dropout in model

---
 examples/lsc/mag240m/gnn.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index 71fb19dc..33c5ed9f 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -47,7 +47,8 @@ def num_classes(self) -> int:
         return 153
 
     def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
-        node_types = ['paper']
+        node_types = ['paper', 'author', 'institution']
+        # node_types = ['paper']
         edge_types = [('author', 'affiliated_with', 'institution'),
                       ('author', 'writes', 'paper'),
                       ('paper', 'cites', 'paper')]
@@ -94,6 +95,7 @@ def __init__(self, model: str, in_channels: int, out_channels: int,
         self.conv1 = SAGEConv(in_channels, hidden_channels)
         self.conv2 = SAGEConv(hidden_channels, hidden_channels)
         self.lin = Linear(hidden_channels, out_channels)
+        # self.relu = ReLU(inplace=True)
 
     def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
         # for i in range(self.num_layers):
@@ -104,9 +106,11 @@ def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
         #     elif self.model == 'graphsage':
         #         x = F.relu(self.norms[i](x))
         #     x = F.dropout(x, p=self.dropout, training=self.training)
-        x = self.conv1(x, edge_index).relu()
-        x = self.conv2(x, edge_index).relu()
-        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.conv1(x, edge_index)
+        # x = F.relu(x, inplace=True)
+        x = self.conv2(x, edge_index)
+        # x = F.relu(x, inplace=True)
+        # x = F.dropout(x, p=self.dropout, training=self.training)
         return x
         # return self.mlp(x)
 

From fd2687931f544b46159e16308952cb102b64916b Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Wed, 9 Nov 2022 15:05:35 +0800
Subject: [PATCH 09/12] add edges

---
 examples/lsc/mag240m/gnn.py | 21 ++++++++++++++-------
 ogb/lsc/mag240m.py          |  6 ++++--
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index 33c5ed9f..85f1b40f 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -49,9 +49,13 @@ def num_classes(self) -> int:
     def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
         node_types = ['paper', 'author', 'institution']
         # node_types = ['paper']
-        edge_types = [('author', 'affiliated_with', 'institution'),
-                      ('author', 'writes', 'paper'),
-                      ('paper', 'cites', 'paper')]
+        edge_types = [
+            ('author', 'affiliated_with', 'institution'),
+            ('institution', 'rev_affiliated_with', 'author'),
+            ('author', 'writes', 'paper'),
+            ('paper', 'rev_writes', 'author'),
+            ('paper', 'cites', 'paper'),
+        ]
         return node_types, edge_types        
 
 class GNN(torch.nn.Module):
@@ -106,11 +110,12 @@ def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
         #     elif self.model == 'graphsage':
         #         x = F.relu(self.norms[i](x))
         #     x = F.dropout(x, p=self.dropout, training=self.training)
+        x = x.to(torch.float)
         x = self.conv1(x, edge_index)
-        # x = F.relu(x, inplace=True)
+        x = F.relu(x, inplace=True)
         x = self.conv2(x, edge_index)
-        # x = F.relu(x, inplace=True)
-        # x = F.dropout(x, p=self.dropout, training=self.training)
+        x = F.relu(x, inplace=True)
+        x = F.dropout(x, p=self.dropout, training=self.training)
         return x
         # return self.mlp(x)
 
@@ -121,7 +126,7 @@ def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeTyp
         super().__init__()
         self.save_hyperparameters()
         model = GNN(model_name, in_channels, out_channels, hidden_channels, num_layers, heads=heads, dropout=dropout)
-        self.model = to_hetero(model, metadata, aggr='sum').to(device)
+        self.model = to_hetero(model, metadata, aggr='sum', debug=True).to(device)
         self.train_acc = Accuracy()
         self.val_acc = Accuracy()
         self.test_acc = Accuracy()
@@ -142,6 +147,8 @@ def forward(
 
     def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]:
         batch_size = batch['paper'].batch_size
+        print(batch.x_dict)
+        print(batch.edge_index_dict)
         y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size]
         y = batch['paper'].y[:batch_size]
         return y_hat, y
diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index 723d5b7a..aba7b80f 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -66,10 +66,12 @@ def to_pyg_hetero_data(self):
 
         data['author'].num_nodes = self.__meta__['author']
         path = osp.join(self.dir, 'processed', 'author', 'author.npy')
-        data['author'].x = np.memmap(path, mode='r', shape=(data['author'].num_nodes, self.num_paper_features))
+        # data['author'].x = np.memmap(path, mode='w+', shape=(data['author'].num_nodes, self.num_paper_features))
+        data['author'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['author'].num_nodes, self.num_paper_features))
         data['institution'].num_nodes = self.__meta__['institution']
         path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
-        data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features))
+        # data['institution'].x = np.memmap(path, mode='w+', shape=(data['institution'].num_nodes, self.num_paper_features))
+        data['institution'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['institution'].num_nodes, self.num_paper_features))
 
         # data['author'].num_nodes = self.__meta__['author']
         # data['institution'].num_nodes = self.__meta__['institution']

From 2dfa0fb75bfbb6dadb625cebac6cbbbddce94fcf Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Tue, 15 Nov 2022 09:52:23 +0800
Subject: [PATCH 10/12] reverse edge types

---
 ogb/lsc/mag240m.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index aba7b80f..349bfbdd 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -79,14 +79,15 @@ def to_pyg_hetero_data(self):
         # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True)
         # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
         # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True)
-        
+
+        print("node done")
         for edge_type in [('author', 'affiliated_with', 'institution'),
                           ('author', 'writes', 'paper'),
                           ('paper', 'cites', 'paper')]:
             name = '___'.join(edge_type)
             path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
             edge_index = torch.from_numpy(np.load(path))
-            data[edge_type].edge_index = edge_index
+            data[edge_type].edge_index = edge_index.flip([0])
 
         for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]:
             idx = self.get_idx_split(f)

From a8424295f6ad0a1b8acbbfa207e78c82c4a6870b Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Sun, 20 Nov 2022 17:10:46 +0800
Subject: [PATCH 11/12] reverse edge types and convert y to long

---
 examples/lsc/mag240m/gnn.py | 59 ++++---------------------------------
 ogb/lsc/mag240m.py          | 24 ++-------------
 2 files changed, 8 insertions(+), 75 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index 85f1b40f..7f55532f 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -48,7 +48,6 @@ def num_classes(self) -> int:
 
     def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]:
         node_types = ['paper', 'author', 'institution']
-        # node_types = ['paper']
         edge_types = [
             ('author', 'affiliated_with', 'institution'),
             ('institution', 'rev_affiliated_with', 'author'),
@@ -67,57 +66,18 @@ def __init__(self, model: str, in_channels: int, out_channels: int,
         self.dropout = dropout
         self.num_layers = num_layers
 
-        # self.convs = ModuleList()
-        # self.norms = ModuleList()
-        # self.skips = ModuleList()
-
-        # if self.model == 'gat':
-        #     self.convs.append(
-        #         GATConv(in_channels, hidden_channels // heads, heads))
-        #     self.skips.append(Linear(in_channels, hidden_channels))
-        #     for _ in range(num_layers - 1):
-        #         self.convs.append(
-        #             GATConv(hidden_channels, hidden_channels // heads, heads))
-        #         self.skips.append(Linear(hidden_channels, hidden_channels))
-
-        # elif self.model == 'graphsage':
-        #     self.convs.append(SAGEConv(in_channels, hidden_channels))
-        #     for _ in range(num_layers - 1):
-        #         self.convs.append(SAGEConv(hidden_channels, hidden_channels))
-
-        # for _ in range(num_layers):
-        #     self.norms.append(BatchNorm1d(hidden_channels))
-
-        # self.mlp = Sequential(
-        #     Linear(hidden_channels, hidden_channels),
-        #     BatchNorm1d(hidden_channels),
-        #     ReLU(inplace=True),
-        #     Dropout(p=self.dropout),
-        #     Linear(hidden_channels, out_channels),
-        # )
-        
         self.conv1 = SAGEConv(in_channels, hidden_channels)
         self.conv2 = SAGEConv(hidden_channels, hidden_channels)
         self.lin = Linear(hidden_channels, out_channels)
         # self.relu = ReLU(inplace=True)
 
     def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
-        # for i in range(self.num_layers):
-        #     x = self.convs[i](x, edge_index)
-        #     if self.model == 'gat':
-        #         x = x + self.skips[i](x)
-        #         x = F.elu(self.norms[i](x))
-        #     elif self.model == 'graphsage':
-        #         x = F.relu(self.norms[i](x))
-        #     x = F.dropout(x, p=self.dropout, training=self.training)
         x = x.to(torch.float)
-        x = self.conv1(x, edge_index)
-        x = F.relu(x, inplace=True)
-        x = self.conv2(x, edge_index)
-        x = F.relu(x, inplace=True)
+        x = self.conv1(x, edge_index).relu()
         x = F.dropout(x, p=self.dropout, training=self.training)
-        return x
-        # return self.mlp(x)
+        x = self.conv2(x, edge_index).relu()
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        return self.lin(x)
 
 class HeteroGNN(LightningModule):
     def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeType]], in_channels: int, out_channels: int,
@@ -138,19 +98,10 @@ def forward(
     ) -> Dict[NodeType, Tensor]:
         return self.model(x_dict, edge_index_dict)
 
-    # @torch.no_grad()
-    # def setup(self, stage: Optional[str] = None):  # Initialize parameters.
-    #     data = self.trainer.datamodule
-    #     loader = data.dataloader(torch.arange(1), shuffle=False, num_workers=0)
-    #     batch = next(iter(loader))
-    #     self(batch.x_dict, batch.edge_index_dict)
-
     def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]:
         batch_size = batch['paper'].batch_size
-        print(batch.x_dict)
-        print(batch.edge_index_dict)
         y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size]
-        y = batch['paper'].y[:batch_size]
+        y = batch['paper'].y[:batch_size].to(torch.long)
         return y_hat, y
 
     def training_step(self, batch: Batch, batch_idx: int) -> Tensor:
diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py
index 349bfbdd..9ddf7fe3 100644
--- a/ogb/lsc/mag240m.py
+++ b/ogb/lsc/mag240m.py
@@ -66,28 +66,19 @@ def to_pyg_hetero_data(self):
 
         data['author'].num_nodes = self.__meta__['author']
         path = osp.join(self.dir, 'processed', 'author', 'author.npy')
-        # data['author'].x = np.memmap(path, mode='w+', shape=(data['author'].num_nodes, self.num_paper_features))
         data['author'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['author'].num_nodes, self.num_paper_features))
         data['institution'].num_nodes = self.__meta__['institution']
         path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
-        # data['institution'].x = np.memmap(path, mode='w+', shape=(data['institution'].num_nodes, self.num_paper_features))
         data['institution'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['institution'].num_nodes, self.num_paper_features))
 
-        # data['author'].num_nodes = self.__meta__['author']
-        # data['institution'].num_nodes = self.__meta__['institution']
-        # path = osp.join(self.dir, 'processed', 'author', 'author.npy')
-        # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True)
-        # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy')
-        # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True)
-
-        print("node done")
         for edge_type in [('author', 'affiliated_with', 'institution'),
                           ('author', 'writes', 'paper'),
                           ('paper', 'cites', 'paper')]:
             name = '___'.join(edge_type)
             path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
             edge_index = torch.from_numpy(np.load(path))
-            data[edge_type].edge_index = edge_index.flip([0])
+            data[edge_type].edge_index = edge_index
+            data[edge_type[2], f'rev_{edge_type[1]}', edge_type[0]].edge_index = edge_index.flip([0])
 
         for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]:
             idx = self.get_idx_split(f)
@@ -152,15 +143,6 @@ def all_paper_year(self) -> np.ndarray:
         path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy')
         return np.load(path)
 
-    # def edge_index(self, id1: str, id2: str,
-    #                id3: Optional[str] = None) -> np.ndarray:
-    #     src = id1
-    #     rel, dst = (id3, id2) if id3 is None else (id2, id3)
-    #     rel = self.__rels__[(src, dst)] if rel is None else rel
-    #     name = f'{src}___{rel}___{dst}'
-    #     path = osp.join(self.dir, 'processed', name, 'edge_index.npy')
-    #     return np.load(path)
-
     def __repr__(self) -> str:
         return f'{self.__class__.__name__}()'
 
@@ -207,7 +189,7 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
 
 
 if __name__ == '__main__':
-    dataset = MAG240MDataset('/home/user/yanbing/pyg/ogb/ogb/lsc/dataset')
+    dataset = MAG240MDataset()
     data = dataset.to_pyg_hetero_data()
     print(dataset)
     print(dataset.num_papers)

From d6d0fd0754d7a8024b0dce5d9185eb02aff11cbe Mon Sep 17 00:00:00 2001
From: yanbing-j <yanbing.jiang@intel.com>
Date: Tue, 29 Nov 2022 21:19:59 +0800
Subject: [PATCH 12/12] Use trainer.predict to run inference

---
 examples/lsc/mag240m/gnn.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py
index 7f55532f..816af124 100644
--- a/examples/lsc/mag240m/gnn.py
+++ b/examples/lsc/mag240m/gnn.py
@@ -31,6 +31,8 @@
 from typing import Dict, Tuple
 from torch_geometric.data import Batch
 from torch_geometric.data import LightningNodeData
+import pathlib
+from torch.profiler import ProfilerActivity, profile
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
@@ -69,7 +71,6 @@ def __init__(self, model: str, in_channels: int, out_channels: int,
         self.conv1 = SAGEConv(in_channels, hidden_channels)
         self.conv2 = SAGEConv(hidden_channels, hidden_channels)
         self.lin = Linear(hidden_channels, out_channels)
-        # self.relu = ReLU(inplace=True)
 
     def forward(self, x: Tensor, edge_index: Adj) -> Tensor:
         x = x.to(torch.float)
@@ -124,11 +125,25 @@ def test_step(self, batch: Batch, batch_idx: int):
         self.log('test_acc', self.test_acc, on_step=False, on_epoch=True,
                  prog_bar=True, sync_dist=True)
 
+    def predict_step(self, batch: Batch, batch_idx: int):
+        y_hat, y = self.common_step(batch)
+        return y_hat
+
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
         scheduler = StepLR(optimizer, step_size=25, gamma=0.25)
         return [optimizer], [scheduler]
 
+def trace_handler(p):
+    if torch.cuda.is_available():
+        profile_sort = 'self_cuda_time_total'
+    else:
+        profile_sort = 'self_cpu_time_total'
+    output = p.key_averages().table(sort_by=profile_sort)
+    print(output)
+    profile_dir = str(pathlib.Path.cwd()) + '/'
+    timeline_file = profile_dir + 'timeline' + '.json'
+    p.export_chrome_trace(timeline_file)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
@@ -142,6 +157,7 @@ def configure_optimizers(self):
     parser.add_argument('--in-memory', action='store_true')
     parser.add_argument('--device', type=str, default='0')
     parser.add_argument('--evaluate', action='store_true')
+    parser.add_argument('--profile', action='store_true')
     args = parser.parse_args()
     args.sizes = [int(i) for i in args.sizes.split('-')]
     print(args)
@@ -149,7 +165,12 @@ def configure_optimizers(self):
     seed_everything(42)
     dataset = MAG240MDataset(ROOT)
     data = dataset.to_pyg_hetero_data()
-    datamodule = MAG240M(data, loader='neighbor', num_neighbors=args.sizes, batch_size=args.batch_size, num_workers=2)
+    datamodule = MAG240M(data, ('paper', data['paper'].train_mask),
+                        ('paper', data['paper'].val_mask),
+                        ('paper', data['paper'].test_mask),
+                        ('paper', data['paper'].test_mask),
+                        loader='neighbor', num_neighbors=args.sizes,
+                        batch_size=args.batch_size, num_workers=2)
     print(datamodule)
 
     if not args.evaluate:
@@ -160,7 +181,9 @@ def configure_optimizers(self):
         checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode = 'max', save_top_k=1)
         trainer = Trainer(accelerator="cpu", max_epochs=args.epochs,
                           callbacks=[checkpoint_callback],
-                          default_root_dir=f'logs/{args.model}')
+                          default_root_dir=f'logs/{args.model}',
+                          limit_train_batches=10, limit_test_batches=10,
+                          limit_val_batches=10, limit_predict_batches=10)
         trainer.fit(model, datamodule=datamodule)
 
     if args.evaluate:
@@ -177,7 +200,12 @@ def configure_optimizers(self):
         datamodule.batch_size = 16
         datamodule.sizes = [160] * len(args.sizes)  # (Almost) no sampling...
 
-        trainer.test(model=model, datamodule=datamodule)
+        trainer.predict(model=model, datamodule=datamodule)
+        if args.profile:
+            with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+                            on_trace_ready=trace_handler) as p:
+                trainer.predict(model=model, datamodule=datamodule)
+                p.step()
 
         # evaluator = MAG240MEvaluator()
         # loader = datamodule.hidden_test_dataloader()