From 71e07f5ba1606571781b4d3153aef421fc5443bd Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 19 Sep 2022 15:36:10 +0800 Subject: [PATCH 01/12] Enable --- examples/lsc/mag240m/gnn.py | 43 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index e0ae7708..ca50a1f8 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -13,7 +13,7 @@ from torch.nn import ModuleList, Sequential, Linear, BatchNorm1d, ReLU, Dropout from torch.optim.lr_scheduler import StepLR -from pytorch_lightning.metrics import Accuracy +from torchmetrics import Accuracy from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning import (LightningDataModule, LightningModule, Trainer, seed_everything) @@ -24,7 +24,12 @@ from ogb.lsc import MAG240MDataset, MAG240MEvaluator from root import ROOT - +from torch_geometric.loader.neighbor_loader import NeighborLoader +from torch.profiler import profile, ProfilerActivity +from torch_geometric.data import Data +from torch_sparse import coalesce +from torch_geometric.typing import Adj +import torch_geometric.transforms as T class Batch(NamedTuple): x: Tensor @@ -214,7 +219,7 @@ def configure_optimizers(self): parser.add_argument('--hidden_channels', type=int, default=1024) parser.add_argument('--batch_size', type=int, default=1024) parser.add_argument('--dropout', type=float, default=0.5) - parser.add_argument('--epochs', type=int, default=100) + parser.add_argument('--epochs', type=int, default=1) parser.add_argument('--model', type=str, default='gat', choices=['gat', 'graphsage']) parser.add_argument('--sizes', type=str, default='25-15') @@ -234,7 +239,7 @@ def configure_optimizers(self): num_layers=len(args.sizes), dropout=args.dropout) print(f'#Params {sum([p.numel() for p in model.parameters()])}') checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode = 'max', save_top_k=1) - trainer = Trainer(gpus=args.device, max_epochs=args.epochs, + trainer = Trainer(accelerator="cpu", max_epochs=args.epochs, callbacks=[checkpoint_callback], default_root_dir=f'logs/{args.model}') trainer.fit(model, datamodule=datamodule) @@ -246,7 +251,7 @@ def configure_optimizers(self): print(f'Evaluating saved model in {logdir}...') ckpt = glob.glob(f'{logdir}/checkpoints/*')[0] - trainer = Trainer(gpus=args.device, resume_from_checkpoint=ckpt) + trainer = Trainer(accelerator="cpu", resume_from_checkpoint=ckpt) model = GNN.load_from_checkpoint(checkpoint_path=ckpt, hparams_file=f'{logdir}/hparams.yaml') @@ -255,17 +260,17 @@ def configure_optimizers(self): trainer.test(model=model, datamodule=datamodule) - evaluator = MAG240MEvaluator() - loader = datamodule.hidden_test_dataloader() - - model.eval() - device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' - model.to(device) - y_preds = [] - for batch in tqdm(loader): - batch = batch.to(device) - with torch.no_grad(): - out = model(batch.x, batch.adjs_t).argmax(dim=-1).cpu() - y_preds.append(out) - res = {'y_pred': torch.cat(y_preds, dim=0)} - evaluator.save_test_submission(res, f'results/{args.model}', mode = 'test-dev') + # evaluator = MAG240MEvaluator() + # loader = datamodule.hidden_test_dataloader() + + # model.eval() + # device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' + # model.to(device) + # y_preds = [] + # for batch in tqdm(loader): + # batch = batch.to(device) + # with torch.no_grad(): + # out = model(batch.x, batch.adjs_t).argmax(dim=-1).cpu() + # y_preds.append(out) + # res = {'y_pred': torch.cat(y_preds, dim=0)} + # evaluator.save_test_submission(res, f'results/{args.model}', mode = 'test-dev') From 1264086c5e7716609bbda8beee32f5f73adc2e46 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 20 Sep 2022 14:22:38 +0800 Subject: [PATCH 02/12] MAG240M to HeteroData --- ogb/lsc/mag240m.py | 66 +++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index 63580f48..ae608bd1 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -9,6 +9,7 @@ from ogb.utils.url import decide_download, download_url, extract_zip, makedirs from ogb.lsc.utils import split_test +from torch_geometric.data import HeteroData class MAG240MDataset(object): @@ -53,6 +54,25 @@ def download(self): print('Stop download.') exit(-1) + def to_pyg_hetero_data(self): + data = HeteroData() + path = osp.join(self.dir, 'processed', 'paper', 'node_feat.npy') + # Current is not in-memory + data["paper"].x = torch.from_numpy(np.load(path, mmap_mode='r')) + path = osp.join(self.dir, 'processed', 'paper', 'node_label.npy') + data["paper"].y = torch.from_numpy(np.load(path)) + path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy') + data["paper"].year = torch.from_numpy(np.load(path, mmap_mode='r')) + + for edge_type in [('author', 'affiliated_with', 'institution'), + ('author', 'writes', 'paper'), + ('paper', 'cites', 'paper')]: + name = '___'.join(edge_type) + path = osp.join(self.dir, 'processed', name, 'edge_index.npy') + edge_index = torch.from_numpy(np.load(path)) + data[edge_type].edge_index = edge_index + return data + @property def num_papers(self) -> int: return self.__meta__['paper'] @@ -108,14 +128,14 @@ def all_paper_year(self) -> np.ndarray: path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy') return np.load(path) - def edge_index(self, id1: str, id2: str, - id3: Optional[str] = None) -> np.ndarray: - src = id1 - rel, dst = (id3, id2) if id3 is None else (id2, id3) - rel = self.__rels__[(src, dst)] if rel is None else rel - name = f'{src}___{rel}___{dst}' - path = osp.join(self.dir, 'processed', name, 'edge_index.npy') - return np.load(path) + # def edge_index(self, id1: str, id2: str, + # id3: Optional[str] = None) -> np.ndarray: + # src = id1 + # rel, dst = (id3, id2) if id3 is None else (id2, id3) + # rel = self.__rels__[(src, dst)] if rel is None else rel + # name = f'{src}___{rel}___{dst}' + # path = osp.join(self.dir, 'processed', name, 'edge_index.npy') + # return np.load(path) def __repr__(self) -> str: return f'{self.__class__.__name__}()' @@ -163,7 +183,8 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): if __name__ == '__main__': - dataset = MAG240MDataset() + dataset = MAG240MDataset('/home/user/yanbing/pyg/ogb/ogb/lsc/dataset') + data = dataset.to_pyg_hetero_data() print(dataset) print(dataset.num_papers) print(dataset.num_authors) @@ -196,12 +217,15 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): exit(-1) - print(dataset.paper_feat.shape) - print(dataset.paper_year.shape) - print(dataset.paper_year[:100]) - print(dataset.edge_index('author', 'paper').shape) - print(dataset.edge_index('author', 'writes', 'paper').shape) - print(dataset.edge_index('author', 'writes', 'paper')[:, :10]) + print(data['paper'].x.shape) + print(data['paper'].year.shape) + print(data['paper'].year[:100]) + print(data[(('author', 'writes', 'paper'))].edge_index.shape) + print(data[('author', 'affiliated_with', 'institution')].edge_index.shape) + print(data[('paper', 'cites', 'paper')].edge_index.shape) + print(data[('author', 'writes', 'paper')].edge_index[:, :10]) + print(data[('author', 'affiliated_with', 'institution')].edge_index[:, :10]) + print(data[('paper', 'cites', 'paper')].edge_index[:, :10]) print('-----------------') train_idx = dataset.get_idx_split('train') @@ -209,9 +233,9 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): test_idx = dataset.get_idx_split('test-whole') print(len(train_idx) + len(val_idx) + len(test_idx)) - print(dataset.paper_label[train_idx][:10]) - print(dataset.paper_label[val_idx][:10]) - print(dataset.paper_label[test_idx][:10]) - print(dataset.paper_year[train_idx][:10]) - print(dataset.paper_year[val_idx][:10]) - print(dataset.paper_year[test_idx][:10]) + print(data['paper'].y[train_idx][:10]) + print(data['paper'].y[val_idx][:10]) + print(data['paper'].y[test_idx][:10]) + print(data['paper'].year[train_idx][:10]) + print(data['paper'].year[val_idx][:10]) + print(data['paper'].year[test_idx][:10]) From 10ee86a14f0b78248d7a56e6c4813dcd40c19853 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Wed, 21 Sep 2022 16:26:58 +0800 Subject: [PATCH 03/12] Try convert model to_hetero --- examples/lsc/mag240m/gnn.py | 251 ++++++++++++++++++++---------------- 1 file changed, 142 insertions(+), 109 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index ca50a1f8..a53bbe82 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -19,30 +19,19 @@ seed_everything) from torch_sparse import SparseTensor -from torch_geometric.nn import SAGEConv, GATConv +from torch_geometric.nn import SAGEConv, GATConv, to_hetero from torch_geometric.data import NeighborSampler from ogb.lsc import MAG240MDataset, MAG240MEvaluator from root import ROOT from torch_geometric.loader.neighbor_loader import NeighborLoader -from torch.profiler import profile, ProfilerActivity -from torch_geometric.data import Data -from torch_sparse import coalesce from torch_geometric.typing import Adj import torch_geometric.transforms as T +from torch_geometric.typing import EdgeType, NodeType +from typing import Dict, Tuple +from torch_geometric.data import Batch -class Batch(NamedTuple): - x: Tensor - y: Tensor - adjs_t: List[SparseTensor] - - def to(self, *args, **kwargs): - return Batch( - x=self.x.to(*args, **kwargs), - y=self.y.to(*args, **kwargs), - adjs_t=[adj_t.to(*args, **kwargs) for adj_t in self.adjs_t], - ) - +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') class MAG240M(LightningDataModule): def __init__(self, data_dir: str, batch_size: int, sizes: List[int], @@ -52,6 +41,7 @@ def __init__(self, data_dir: str, batch_size: int, sizes: List[int], self.batch_size = batch_size self.sizes = sizes self.in_memory = in_memory + self.transform = T.ToUndirected(merge=False) @property def num_features(self) -> int: @@ -63,23 +53,25 @@ def num_classes(self) -> int: def prepare_data(self): dataset = MAG240MDataset(self.data_dir) + self.data = dataset.to_pyg_hetero_data() path = f'{dataset.dir}/paper_to_paper_symmetric.pt' if not osp.exists(path): t = time.perf_counter() print('Converting adjacency matrix...', end=' ', flush=True) - edge_index = dataset.edge_index('paper', 'cites', 'paper') - edge_index = torch.from_numpy(edge_index) - adj_t = SparseTensor( - row=edge_index[0], col=edge_index[1], - sparse_sizes=(dataset.num_papers, dataset.num_papers), - is_sorted=True) - torch.save(adj_t.to_symmetric(), path) + edge_index = self.data[('paper', 'cites', 'paper')].edge_index + # edge_index = torch.from_numpy(edge_index) + # adj_t = SparseTensor( + # row=edge_index[0], col=edge_index[1], + # sparse_sizes=(dataset.num_papers, dataset.num_papers), + # is_sorted=True) + # torch.save(adj_t.to_symmetric(), path) print(f'Done! [{time.perf_counter() - t:.2f}s]') def setup(self, stage: Optional[str] = None): t = time.perf_counter() print('Reading dataset...', end=' ', flush=True) dataset = MAG240MDataset(self.data_dir) + self.data = dataset.to_pyg_hetero_data() self.train_idx = torch.from_numpy(dataset.get_idx_split('train')) self.train_idx = self.train_idx @@ -88,123 +80,163 @@ def setup(self, stage: Optional[str] = None): self.val_idx.share_memory_() self.test_idx = torch.from_numpy(dataset.get_idx_split('test-dev')) self.test_idx.share_memory_() + print("train_idx", self.train_idx) + print("val_idx", self.val_idx) + print("test_idx", self.test_idx) if self.in_memory: self.x = torch.from_numpy(dataset.all_paper_feat).share_memory_() else: - self.x = dataset.paper_feat - self.y = torch.from_numpy(dataset.all_paper_label) + self.x = self.data['paper'].x + self.y = self.data['paper'].y - path = f'{dataset.dir}/paper_to_paper_symmetric.pt' - self.adj_t = torch.load(path) + # path = f'{dataset.dir}/paper_to_paper_symmetric.pt' + # self.adj_t = torch.load(path) print(f'Done! [{time.perf_counter() - t:.2f}s]') + def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: + node_types = ['paper'] + edge_types = [('author', 'affiliated_with', 'institution'), + ('author', 'writes', 'paper'), + ('paper', 'cites', 'paper')] + return node_types, edge_types + def train_dataloader(self): - return NeighborSampler(self.adj_t, node_idx=self.train_idx, - sizes=self.sizes, return_e_id=False, - transform=self.convert_batch, - batch_size=self.batch_size, shuffle=True, - num_workers=4) + return NeighborLoader(self.data, num_neighbors=self.sizes, + input_nodes=self.train_idx, + batch_size=self.batch_size, shuffle=True, + num_workers=4) def val_dataloader(self): - return NeighborSampler(self.adj_t, node_idx=self.val_idx, - sizes=self.sizes, return_e_id=False, - transform=self.convert_batch, - batch_size=self.batch_size, num_workers=2) + return NeighborLoader(self.data, num_neighbors=self.sizes, + input_nodes=self.val_idx, + batch_size=self.batch_size, num_workers=2) def test_dataloader(self): # Test best validation model once again. - return NeighborSampler(self.adj_t, node_idx=self.val_idx, - sizes=self.sizes, return_e_id=False, - transform=self.convert_batch, - batch_size=self.batch_size, num_workers=2) + return NeighborLoader(self.data, num_neighbors=self.sizes, + input_nodes=self.val_idx, + batch_size=self.batch_size, num_workers=2) def hidden_test_dataloader(self): - return NeighborSampler(self.adj_t, node_idx=self.test_idx, - sizes=self.sizes, return_e_id=False, - transform=self.convert_batch, - batch_size=self.batch_size, num_workers=3) + return NeighborLoader(self.data, num_neighbors=self.sizes, + input_nodes=self.test_idx, + batch_size=self.batch_size, num_workers=3) - def convert_batch(self, batch_size, n_id, adjs): - if self.in_memory: - x = self.x[n_id].to(torch.float) - else: - x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float) - y = self.y[n_id[:batch_size]].to(torch.long) - return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs]) + # def convert_batch(self, batch_size, n_id, adjs): + # if self.in_memory: + # x = self.x[n_id].to(torch.float) + # else: + # x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float) + # y = self.y[n_id[:batch_size]].to(torch.long) + # return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs]) -class GNN(LightningModule): +class GNN(torch.nn.Module): def __init__(self, model: str, in_channels: int, out_channels: int, hidden_channels: int, num_layers: int, heads: int = 4, dropout: float = 0.5): super().__init__() - self.save_hyperparameters() self.model = model.lower() self.dropout = dropout - - self.convs = ModuleList() - self.norms = ModuleList() - self.skips = ModuleList() - - if self.model == 'gat': - self.convs.append( - GATConv(in_channels, hidden_channels // heads, heads)) - self.skips.append(Linear(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - self.convs.append( - GATConv(hidden_channels, hidden_channels // heads, heads)) - self.skips.append(Linear(hidden_channels, hidden_channels)) - - elif self.model == 'graphsage': - self.convs.append(SAGEConv(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - self.convs.append(SAGEConv(hidden_channels, hidden_channels)) - - for _ in range(num_layers): - self.norms.append(BatchNorm1d(hidden_channels)) - - self.mlp = Sequential( - Linear(hidden_channels, hidden_channels), - BatchNorm1d(hidden_channels), - ReLU(inplace=True), - Dropout(p=self.dropout), - Linear(hidden_channels, out_channels), - ) - + self.num_layers = num_layers + + # self.convs = ModuleList() + # self.norms = ModuleList() + # self.skips = ModuleList() + + # if self.model == 'gat': + # self.convs.append( + # GATConv(in_channels, hidden_channels // heads, heads)) + # self.skips.append(Linear(in_channels, hidden_channels)) + # for _ in range(num_layers - 1): + # self.convs.append( + # GATConv(hidden_channels, hidden_channels // heads, heads)) + # self.skips.append(Linear(hidden_channels, hidden_channels)) + + # elif self.model == 'graphsage': + # self.convs.append(SAGEConv(in_channels, hidden_channels)) + # for _ in range(num_layers - 1): + # self.convs.append(SAGEConv(hidden_channels, hidden_channels)) + + # for _ in range(num_layers): + # self.norms.append(BatchNorm1d(hidden_channels)) + + # self.mlp = Sequential( + # Linear(hidden_channels, hidden_channels), + # BatchNorm1d(hidden_channels), + # ReLU(inplace=True), + # Dropout(p=self.dropout), + # Linear(hidden_channels, out_channels), + # ) + + self.conv1 = SAGEConv(in_channels, hidden_channels) + self.conv2 = SAGEConv(hidden_channels, hidden_channels) + self.lin = Linear(hidden_channels, out_channels) + + def forward(self, x: Tensor, edge_index: Adj) -> Tensor: + # for i in range(self.num_layers): + # x = self.convs[i](x, edge_index) + # if self.model == 'gat': + # x = x + self.skips[i](x) + # x = F.elu(self.norms[i](x)) + # elif self.model == 'graphsage': + # x = F.relu(self.norms[i](x)) + # x = F.dropout(x, p=self.dropout, training=self.training) + x = self.conv1(x, edge_index).relu() + x = self.conv2(x, edge_index).relu() + x = F.dropout(x, p=self.dropout, training=self.training) + return x + # return self.mlp(x) + +class HeteroGNN(LightningModule): + def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeType]], in_channels: int, out_channels: int, + hidden_channels: int, num_layers: int, heads: int = 4, + dropout: float = 0.5): + super().__init__() + self.save_hyperparameters() + model = GNN(model_name, in_channels, out_channels, hidden_channels, num_layers, heads=heads, dropout=dropout) + self.model = to_hetero(model, metadata, aggr='sum').to(device) self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() - def forward(self, x: Tensor, adjs_t: List[SparseTensor]) -> Tensor: - for i, adj_t in enumerate(adjs_t): - x_target = x[:adj_t.size(0)] - x = self.convs[i]((x, x_target), adj_t) - if self.model == 'gat': - x = x + self.skips[i](x_target) - x = F.elu(self.norms[i](x)) - elif self.model == 'graphsage': - x = F.relu(self.norms[i](x)) - x = F.dropout(x, p=self.dropout, training=self.training) - - return self.mlp(x) - - def training_step(self, batch, batch_idx: int): - y_hat = self(batch.x, batch.adjs_t) - train_loss = F.cross_entropy(y_hat, batch.y) - self.train_acc(y_hat.softmax(dim=-1), batch.y) + def forward( + self, + x_dict: Dict[NodeType, Tensor], + edge_index_dict: Dict[EdgeType, Tensor], + ) -> Dict[NodeType, Tensor]: + return self.model(x_dict, edge_index_dict) + + # @torch.no_grad() + # def setup(self, stage: Optional[str] = None): # Initialize parameters. + # data = self.trainer.datamodule + # loader = data.dataloader(torch.arange(1), shuffle=False, num_workers=0) + # batch = next(iter(loader)) + # self(batch.x_dict, batch.edge_index_dict) + + def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]: + batch_size = batch['paper'].batch_size + y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size] + y = batch['paper'].y[:batch_size] + return y_hat, y + + def training_step(self, batch: Batch, batch_idx: int) -> Tensor: + y_hat, y = self.common_step(batch) + train_loss = F.cross_entropy(y_hat, y) + self.train_acc(y_hat.softmax(dim=-1), y) self.log('train_acc', self.train_acc, prog_bar=True, on_step=False, on_epoch=True) return train_loss - def validation_step(self, batch, batch_idx: int): - y_hat = self(batch.x, batch.adjs_t) - self.val_acc(y_hat.softmax(dim=-1), batch.y) + def validation_step(self, batch: Batch, batch_idx: int): + y_hat, y = self.common_step(batch) + self.val_acc(y_hat.softmax(dim=-1), y) self.log('val_acc', self.val_acc, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True) - def test_step(self, batch, batch_idx: int): - y_hat = self(batch.x, batch.adjs_t) - self.test_acc(y_hat.softmax(dim=-1), batch.y) + def test_step(self, batch: Batch, batch_idx: int): + y_hat, y = self.common_step(batch) + self.test_acc(y_hat.softmax(dim=-1), y) self.log('test_acc', self.test_acc, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True) @@ -222,7 +254,7 @@ def configure_optimizers(self): parser.add_argument('--epochs', type=int, default=1) parser.add_argument('--model', type=str, default='gat', choices=['gat', 'graphsage']) - parser.add_argument('--sizes', type=str, default='25-15') + parser.add_argument('--sizes', type=str, default='2') parser.add_argument('--in-memory', action='store_true') parser.add_argument('--device', type=str, default='0') parser.add_argument('--evaluate', action='store_true') @@ -232,9 +264,10 @@ def configure_optimizers(self): seed_everything(42) datamodule = MAG240M(ROOT, args.batch_size, args.sizes, args.in_memory) + print(datamodule.metadata()) if not args.evaluate: - model = GNN(args.model, datamodule.num_features, + model = HeteroGNN(args.model, datamodule.metadata(), datamodule.num_features, datamodule.num_classes, args.hidden_channels, num_layers=len(args.sizes), dropout=args.dropout) print(f'#Params {sum([p.numel() for p in model.parameters()])}') @@ -252,7 +285,7 @@ def configure_optimizers(self): ckpt = glob.glob(f'{logdir}/checkpoints/*')[0] trainer = Trainer(accelerator="cpu", resume_from_checkpoint=ckpt) - model = GNN.load_from_checkpoint(checkpoint_path=ckpt, + model = HeteroGNN.load_from_checkpoint(checkpoint_path=ckpt, hparams_file=f'{logdir}/hparams.yaml') datamodule.batch_size = 16 From cfdf217b884ecead8e783fdde4893b4b7cd79f9f Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Thu, 22 Sep 2022 10:52:17 +0800 Subject: [PATCH 04/12] Add author and institution as node types --- ogb/lsc/mag240m.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index ae608bd1..ea4f1a51 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -58,11 +58,13 @@ def to_pyg_hetero_data(self): data = HeteroData() path = osp.join(self.dir, 'processed', 'paper', 'node_feat.npy') # Current is not in-memory - data["paper"].x = torch.from_numpy(np.load(path, mmap_mode='r')) + data['paper'].x = torch.from_numpy(np.load(path, mmap_mode='r')) path = osp.join(self.dir, 'processed', 'paper', 'node_label.npy') - data["paper"].y = torch.from_numpy(np.load(path)) + data['paper'].y = torch.from_numpy(np.load(path)) path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy') - data["paper"].year = torch.from_numpy(np.load(path, mmap_mode='r')) + data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r')) + data['author'].num_nodes = self.__meta__['author'] + data['institution'].num_nodes = self.__meta__['institution'] for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), From 7480eea2527af6185f1431c821c8dc4353a67d45 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 26 Sep 2022 14:21:50 +0800 Subject: [PATCH 05/12] Use LightningNodeData --- examples/lsc/mag240m/gnn.py | 92 ++++--------------------------------- ogb/lsc/mag240m.py | 7 +++ 2 files changed, 15 insertions(+), 84 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index a53bbe82..71fb19dc 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -30,18 +30,13 @@ from torch_geometric.typing import EdgeType, NodeType from typing import Dict, Tuple from torch_geometric.data import Batch +from torch_geometric.data import LightningNodeData device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -class MAG240M(LightningDataModule): - def __init__(self, data_dir: str, batch_size: int, sizes: List[int], - in_memory: bool = False): - super().__init__() - self.data_dir = data_dir - self.batch_size = batch_size - self.sizes = sizes - self.in_memory = in_memory - self.transform = T.ToUndirected(merge=False) +class MAG240M(LightningNodeData): + def __init__(self, *args, **kwargs): + super(MAG240M, self).__init__(*args, **kwargs) @property def num_features(self) -> int: @@ -51,49 +46,6 @@ def num_features(self) -> int: def num_classes(self) -> int: return 153 - def prepare_data(self): - dataset = MAG240MDataset(self.data_dir) - self.data = dataset.to_pyg_hetero_data() - path = f'{dataset.dir}/paper_to_paper_symmetric.pt' - if not osp.exists(path): - t = time.perf_counter() - print('Converting adjacency matrix...', end=' ', flush=True) - edge_index = self.data[('paper', 'cites', 'paper')].edge_index - # edge_index = torch.from_numpy(edge_index) - # adj_t = SparseTensor( - # row=edge_index[0], col=edge_index[1], - # sparse_sizes=(dataset.num_papers, dataset.num_papers), - # is_sorted=True) - # torch.save(adj_t.to_symmetric(), path) - print(f'Done! [{time.perf_counter() - t:.2f}s]') - - def setup(self, stage: Optional[str] = None): - t = time.perf_counter() - print('Reading dataset...', end=' ', flush=True) - dataset = MAG240MDataset(self.data_dir) - self.data = dataset.to_pyg_hetero_data() - - self.train_idx = torch.from_numpy(dataset.get_idx_split('train')) - self.train_idx = self.train_idx - self.train_idx.share_memory_() - self.val_idx = torch.from_numpy(dataset.get_idx_split('valid')) - self.val_idx.share_memory_() - self.test_idx = torch.from_numpy(dataset.get_idx_split('test-dev')) - self.test_idx.share_memory_() - print("train_idx", self.train_idx) - print("val_idx", self.val_idx) - print("test_idx", self.test_idx) - - if self.in_memory: - self.x = torch.from_numpy(dataset.all_paper_feat).share_memory_() - else: - self.x = self.data['paper'].x - self.y = self.data['paper'].y - - # path = f'{dataset.dir}/paper_to_paper_symmetric.pt' - # self.adj_t = torch.load(path) - print(f'Done! [{time.perf_counter() - t:.2f}s]') - def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: node_types = ['paper'] edge_types = [('author', 'affiliated_with', 'institution'), @@ -101,36 +53,6 @@ def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: ('paper', 'cites', 'paper')] return node_types, edge_types - def train_dataloader(self): - return NeighborLoader(self.data, num_neighbors=self.sizes, - input_nodes=self.train_idx, - batch_size=self.batch_size, shuffle=True, - num_workers=4) - - def val_dataloader(self): - return NeighborLoader(self.data, num_neighbors=self.sizes, - input_nodes=self.val_idx, - batch_size=self.batch_size, num_workers=2) - - def test_dataloader(self): # Test best validation model once again. - return NeighborLoader(self.data, num_neighbors=self.sizes, - input_nodes=self.val_idx, - batch_size=self.batch_size, num_workers=2) - - def hidden_test_dataloader(self): - return NeighborLoader(self.data, num_neighbors=self.sizes, - input_nodes=self.test_idx, - batch_size=self.batch_size, num_workers=3) - - # def convert_batch(self, batch_size, n_id, adjs): - # if self.in_memory: - # x = self.x[n_id].to(torch.float) - # else: - # x = torch.from_numpy(self.x[n_id.numpy()]).to(torch.float) - # y = self.y[n_id[:batch_size]].to(torch.long) - # return Batch(x=x, y=y, adjs_t=[adj_t for adj_t, _, _ in adjs]) - - class GNN(torch.nn.Module): def __init__(self, model: str, in_channels: int, out_channels: int, hidden_channels: int, num_layers: int, heads: int = 4, @@ -263,8 +185,10 @@ def configure_optimizers(self): print(args) seed_everything(42) - datamodule = MAG240M(ROOT, args.batch_size, args.sizes, args.in_memory) - print(datamodule.metadata()) + dataset = MAG240MDataset(ROOT) + data = dataset.to_pyg_hetero_data() + datamodule = MAG240M(data, loader='neighbor', num_neighbors=args.sizes, batch_size=args.batch_size, num_workers=2) + print(datamodule) if not args.evaluate: model = HeteroGNN(args.model, datamodule.metadata(), datamodule.num_features, diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index ea4f1a51..32993c2c 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -73,6 +73,13 @@ def to_pyg_hetero_data(self): path = osp.join(self.dir, 'processed', name, 'edge_index.npy') edge_index = torch.from_numpy(np.load(path)) data[edge_type].edge_index = edge_index + + for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]: + idx = self.get_idx_split(f) + idx = torch.from_numpy(idx) + mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool) + mask[idx] = True + data['paper'][f'{v}_mask'] = mask return data @property From 12326066c25184a98e3ed75b44e9ed23f6fff16c Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 27 Sep 2022 10:45:31 +0800 Subject: [PATCH 06/12] Add inst.npy --- ogb/lsc/mag240m.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index 32993c2c..062f3fdd 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -65,6 +65,8 @@ def to_pyg_hetero_data(self): data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r')) data['author'].num_nodes = self.__meta__['author'] data['institution'].num_nodes = self.__meta__['institution'] + path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') + data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features)) for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), From 70f149ab07ff2759696dc9e86bf192764bc60c6e Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Wed, 28 Sep 2022 10:05:12 +0800 Subject: [PATCH 07/12] Add author.npy --- ogb/lsc/mag240m.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index 062f3fdd..723d5b7a 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -63,10 +63,20 @@ def to_pyg_hetero_data(self): data['paper'].y = torch.from_numpy(np.load(path)) path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy') data['paper'].year = torch.from_numpy(np.load(path, mmap_mode='r')) + data['author'].num_nodes = self.__meta__['author'] + path = osp.join(self.dir, 'processed', 'author', 'author.npy') + data['author'].x = np.memmap(path, mode='r', shape=(data['author'].num_nodes, self.num_paper_features)) data['institution'].num_nodes = self.__meta__['institution'] path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features)) + + # data['author'].num_nodes = self.__meta__['author'] + # data['institution'].num_nodes = self.__meta__['institution'] + # path = osp.join(self.dir, 'processed', 'author', 'author.npy') + # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True) + # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') + # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True) for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), From e93c89cb998dbb103031396920a4f5f542906023 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Thu, 20 Oct 2022 14:45:41 +0800 Subject: [PATCH 08/12] Add 3 nodes and remove relu/dropout in model --- examples/lsc/mag240m/gnn.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index 71fb19dc..33c5ed9f 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -47,7 +47,8 @@ def num_classes(self) -> int: return 153 def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: - node_types = ['paper'] + node_types = ['paper', 'author', 'institution'] + # node_types = ['paper'] edge_types = [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper')] @@ -94,6 +95,7 @@ def __init__(self, model: str, in_channels: int, out_channels: int, self.conv1 = SAGEConv(in_channels, hidden_channels) self.conv2 = SAGEConv(hidden_channels, hidden_channels) self.lin = Linear(hidden_channels, out_channels) + # self.relu = ReLU(inplace=True) def forward(self, x: Tensor, edge_index: Adj) -> Tensor: # for i in range(self.num_layers): @@ -104,9 +106,11 @@ def forward(self, x: Tensor, edge_index: Adj) -> Tensor: # elif self.model == 'graphsage': # x = F.relu(self.norms[i](x)) # x = F.dropout(x, p=self.dropout, training=self.training) - x = self.conv1(x, edge_index).relu() - x = self.conv2(x, edge_index).relu() - x = F.dropout(x, p=self.dropout, training=self.training) + x = self.conv1(x, edge_index) + # x = F.relu(x, inplace=True) + x = self.conv2(x, edge_index) + # x = F.relu(x, inplace=True) + # x = F.dropout(x, p=self.dropout, training=self.training) return x # return self.mlp(x) From fd2687931f544b46159e16308952cb102b64916b Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Wed, 9 Nov 2022 15:05:35 +0800 Subject: [PATCH 09/12] add edges --- examples/lsc/mag240m/gnn.py | 21 ++++++++++++++------- ogb/lsc/mag240m.py | 6 ++++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index 33c5ed9f..85f1b40f 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -49,9 +49,13 @@ def num_classes(self) -> int: def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: node_types = ['paper', 'author', 'institution'] # node_types = ['paper'] - edge_types = [('author', 'affiliated_with', 'institution'), - ('author', 'writes', 'paper'), - ('paper', 'cites', 'paper')] + edge_types = [ + ('author', 'affiliated_with', 'institution'), + ('institution', 'rev_affiliated_with', 'author'), + ('author', 'writes', 'paper'), + ('paper', 'rev_writes', 'author'), + ('paper', 'cites', 'paper'), + ] return node_types, edge_types class GNN(torch.nn.Module): @@ -106,11 +110,12 @@ def forward(self, x: Tensor, edge_index: Adj) -> Tensor: # elif self.model == 'graphsage': # x = F.relu(self.norms[i](x)) # x = F.dropout(x, p=self.dropout, training=self.training) + x = x.to(torch.float) x = self.conv1(x, edge_index) - # x = F.relu(x, inplace=True) + x = F.relu(x, inplace=True) x = self.conv2(x, edge_index) - # x = F.relu(x, inplace=True) - # x = F.dropout(x, p=self.dropout, training=self.training) + x = F.relu(x, inplace=True) + x = F.dropout(x, p=self.dropout, training=self.training) return x # return self.mlp(x) @@ -121,7 +126,7 @@ def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeTyp super().__init__() self.save_hyperparameters() model = GNN(model_name, in_channels, out_channels, hidden_channels, num_layers, heads=heads, dropout=dropout) - self.model = to_hetero(model, metadata, aggr='sum').to(device) + self.model = to_hetero(model, metadata, aggr='sum', debug=True).to(device) self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() @@ -142,6 +147,8 @@ def forward( def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]: batch_size = batch['paper'].batch_size + print(batch.x_dict) + print(batch.edge_index_dict) y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size] y = batch['paper'].y[:batch_size] return y_hat, y diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index 723d5b7a..aba7b80f 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -66,10 +66,12 @@ def to_pyg_hetero_data(self): data['author'].num_nodes = self.__meta__['author'] path = osp.join(self.dir, 'processed', 'author', 'author.npy') - data['author'].x = np.memmap(path, mode='r', shape=(data['author'].num_nodes, self.num_paper_features)) + # data['author'].x = np.memmap(path, mode='w+', shape=(data['author'].num_nodes, self.num_paper_features)) + data['author'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['author'].num_nodes, self.num_paper_features)) data['institution'].num_nodes = self.__meta__['institution'] path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') - data['institution'].x = np.memmap(path, mode='r', shape=(data['institution'].num_nodes, self.num_paper_features)) + # data['institution'].x = np.memmap(path, mode='w+', shape=(data['institution'].num_nodes, self.num_paper_features)) + data['institution'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['institution'].num_nodes, self.num_paper_features)) # data['author'].num_nodes = self.__meta__['author'] # data['institution'].num_nodes = self.__meta__['institution'] From 2dfa0fb75bfbb6dadb625cebac6cbbbddce94fcf Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 15 Nov 2022 09:52:23 +0800 Subject: [PATCH 10/12] reverse edge types --- ogb/lsc/mag240m.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index aba7b80f..349bfbdd 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -79,14 +79,15 @@ def to_pyg_hetero_data(self): # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True) # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True) - + + print("node done") for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper')]: name = '___'.join(edge_type) path = osp.join(self.dir, 'processed', name, 'edge_index.npy') edge_index = torch.from_numpy(np.load(path)) - data[edge_type].edge_index = edge_index + data[edge_type].edge_index = edge_index.flip([0]) for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]: idx = self.get_idx_split(f) From a8424295f6ad0a1b8acbbfa207e78c82c4a6870b Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Sun, 20 Nov 2022 17:10:46 +0800 Subject: [PATCH 11/12] reverse edge types and convert y to long --- examples/lsc/mag240m/gnn.py | 59 ++++--------------------------------- ogb/lsc/mag240m.py | 24 ++------------- 2 files changed, 8 insertions(+), 75 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index 85f1b40f..7f55532f 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -48,7 +48,6 @@ def num_classes(self) -> int: def metadata(self) -> Tuple[List[NodeType], List[EdgeType]]: node_types = ['paper', 'author', 'institution'] - # node_types = ['paper'] edge_types = [ ('author', 'affiliated_with', 'institution'), ('institution', 'rev_affiliated_with', 'author'), @@ -67,57 +66,18 @@ def __init__(self, model: str, in_channels: int, out_channels: int, self.dropout = dropout self.num_layers = num_layers - # self.convs = ModuleList() - # self.norms = ModuleList() - # self.skips = ModuleList() - - # if self.model == 'gat': - # self.convs.append( - # GATConv(in_channels, hidden_channels // heads, heads)) - # self.skips.append(Linear(in_channels, hidden_channels)) - # for _ in range(num_layers - 1): - # self.convs.append( - # GATConv(hidden_channels, hidden_channels // heads, heads)) - # self.skips.append(Linear(hidden_channels, hidden_channels)) - - # elif self.model == 'graphsage': - # self.convs.append(SAGEConv(in_channels, hidden_channels)) - # for _ in range(num_layers - 1): - # self.convs.append(SAGEConv(hidden_channels, hidden_channels)) - - # for _ in range(num_layers): - # self.norms.append(BatchNorm1d(hidden_channels)) - - # self.mlp = Sequential( - # Linear(hidden_channels, hidden_channels), - # BatchNorm1d(hidden_channels), - # ReLU(inplace=True), - # Dropout(p=self.dropout), - # Linear(hidden_channels, out_channels), - # ) - self.conv1 = SAGEConv(in_channels, hidden_channels) self.conv2 = SAGEConv(hidden_channels, hidden_channels) self.lin = Linear(hidden_channels, out_channels) # self.relu = ReLU(inplace=True) def forward(self, x: Tensor, edge_index: Adj) -> Tensor: - # for i in range(self.num_layers): - # x = self.convs[i](x, edge_index) - # if self.model == 'gat': - # x = x + self.skips[i](x) - # x = F.elu(self.norms[i](x)) - # elif self.model == 'graphsage': - # x = F.relu(self.norms[i](x)) - # x = F.dropout(x, p=self.dropout, training=self.training) x = x.to(torch.float) - x = self.conv1(x, edge_index) - x = F.relu(x, inplace=True) - x = self.conv2(x, edge_index) - x = F.relu(x, inplace=True) + x = self.conv1(x, edge_index).relu() x = F.dropout(x, p=self.dropout, training=self.training) - return x - # return self.mlp(x) + x = self.conv2(x, edge_index).relu() + x = F.dropout(x, p=self.dropout, training=self.training) + return self.lin(x) class HeteroGNN(LightningModule): def __init__(self, model_name: str, metadata: Tuple[List[NodeType], List[EdgeType]], in_channels: int, out_channels: int, @@ -138,19 +98,10 @@ def forward( ) -> Dict[NodeType, Tensor]: return self.model(x_dict, edge_index_dict) - # @torch.no_grad() - # def setup(self, stage: Optional[str] = None): # Initialize parameters. - # data = self.trainer.datamodule - # loader = data.dataloader(torch.arange(1), shuffle=False, num_workers=0) - # batch = next(iter(loader)) - # self(batch.x_dict, batch.edge_index_dict) - def common_step(self, batch: Batch) -> Tuple[Tensor, Tensor]: batch_size = batch['paper'].batch_size - print(batch.x_dict) - print(batch.edge_index_dict) y_hat = self(batch.x_dict, batch.edge_index_dict)['paper'][:batch_size] - y = batch['paper'].y[:batch_size] + y = batch['paper'].y[:batch_size].to(torch.long) return y_hat, y def training_step(self, batch: Batch, batch_idx: int) -> Tensor: diff --git a/ogb/lsc/mag240m.py b/ogb/lsc/mag240m.py index 349bfbdd..9ddf7fe3 100644 --- a/ogb/lsc/mag240m.py +++ b/ogb/lsc/mag240m.py @@ -66,28 +66,19 @@ def to_pyg_hetero_data(self): data['author'].num_nodes = self.__meta__['author'] path = osp.join(self.dir, 'processed', 'author', 'author.npy') - # data['author'].x = np.memmap(path, mode='w+', shape=(data['author'].num_nodes, self.num_paper_features)) data['author'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['author'].num_nodes, self.num_paper_features)) data['institution'].num_nodes = self.__meta__['institution'] path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') - # data['institution'].x = np.memmap(path, mode='w+', shape=(data['institution'].num_nodes, self.num_paper_features)) data['institution'].x = np.memmap(path, mode='r', dtype="float16", shape=(data['institution'].num_nodes, self.num_paper_features)) - # data['author'].num_nodes = self.__meta__['author'] - # data['institution'].num_nodes = self.__meta__['institution'] - # path = osp.join(self.dir, 'processed', 'author', 'author.npy') - # data['author'].x = np.load(path, mmap_mode='r', encoding='bytes', allow_pickle=True) - # path = osp.join(self.dir, 'processed', 'institution', 'inst.npy') - # data['institution'].x = np.load(path, mmap_mode='r', allow_pickle=True) - - print("node done") for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper')]: name = '___'.join(edge_type) path = osp.join(self.dir, 'processed', name, 'edge_index.npy') edge_index = torch.from_numpy(np.load(path)) - data[edge_type].edge_index = edge_index.flip([0]) + data[edge_type].edge_index = edge_index + data[edge_type[2], f'rev_{edge_type[1]}', edge_type[0]].edge_index = edge_index.flip([0]) for f, v in [('train', 'train'), ('valid', 'val'), ('test-dev', 'test')]: idx = self.get_idx_split(f) @@ -152,15 +143,6 @@ def all_paper_year(self) -> np.ndarray: path = osp.join(self.dir, 'processed', 'paper', 'node_year.npy') return np.load(path) - # def edge_index(self, id1: str, id2: str, - # id3: Optional[str] = None) -> np.ndarray: - # src = id1 - # rel, dst = (id3, id2) if id3 is None else (id2, id3) - # rel = self.__rels__[(src, dst)] if rel is None else rel - # name = f'{src}___{rel}___{dst}' - # path = osp.join(self.dir, 'processed', name, 'edge_index.npy') - # return np.load(path) - def __repr__(self) -> str: return f'{self.__class__.__name__}()' @@ -207,7 +189,7 @@ def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): if __name__ == '__main__': - dataset = MAG240MDataset('/home/user/yanbing/pyg/ogb/ogb/lsc/dataset') + dataset = MAG240MDataset() data = dataset.to_pyg_hetero_data() print(dataset) print(dataset.num_papers) From d6d0fd0754d7a8024b0dce5d9185eb02aff11cbe Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 29 Nov 2022 21:19:59 +0800 Subject: [PATCH 12/12] Use trainer.predict to run inference --- examples/lsc/mag240m/gnn.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/examples/lsc/mag240m/gnn.py b/examples/lsc/mag240m/gnn.py index 7f55532f..816af124 100644 --- a/examples/lsc/mag240m/gnn.py +++ b/examples/lsc/mag240m/gnn.py @@ -31,6 +31,8 @@ from typing import Dict, Tuple from torch_geometric.data import Batch from torch_geometric.data import LightningNodeData +import pathlib +from torch.profiler import ProfilerActivity, profile device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -69,7 +71,6 @@ def __init__(self, model: str, in_channels: int, out_channels: int, self.conv1 = SAGEConv(in_channels, hidden_channels) self.conv2 = SAGEConv(hidden_channels, hidden_channels) self.lin = Linear(hidden_channels, out_channels) - # self.relu = ReLU(inplace=True) def forward(self, x: Tensor, edge_index: Adj) -> Tensor: x = x.to(torch.float) @@ -124,11 +125,25 @@ def test_step(self, batch: Batch, batch_idx: int): self.log('test_acc', self.test_acc, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True) + def predict_step(self, batch: Batch, batch_idx: int): + y_hat, y = self.common_step(batch) + return y_hat + def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=0.001) scheduler = StepLR(optimizer, step_size=25, gamma=0.25) return [optimizer], [scheduler] +def trace_handler(p): + if torch.cuda.is_available(): + profile_sort = 'self_cuda_time_total' + else: + profile_sort = 'self_cpu_time_total' + output = p.key_averages().table(sort_by=profile_sort) + print(output) + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'timeline' + '.json' + p.export_chrome_trace(timeline_file) if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -142,6 +157,7 @@ def configure_optimizers(self): parser.add_argument('--in-memory', action='store_true') parser.add_argument('--device', type=str, default='0') parser.add_argument('--evaluate', action='store_true') + parser.add_argument('--profile', action='store_true') args = parser.parse_args() args.sizes = [int(i) for i in args.sizes.split('-')] print(args) @@ -149,7 +165,12 @@ def configure_optimizers(self): seed_everything(42) dataset = MAG240MDataset(ROOT) data = dataset.to_pyg_hetero_data() - datamodule = MAG240M(data, loader='neighbor', num_neighbors=args.sizes, batch_size=args.batch_size, num_workers=2) + datamodule = MAG240M(data, ('paper', data['paper'].train_mask), + ('paper', data['paper'].val_mask), + ('paper', data['paper'].test_mask), + ('paper', data['paper'].test_mask), + loader='neighbor', num_neighbors=args.sizes, + batch_size=args.batch_size, num_workers=2) print(datamodule) if not args.evaluate: @@ -160,7 +181,9 @@ def configure_optimizers(self): checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode = 'max', save_top_k=1) trainer = Trainer(accelerator="cpu", max_epochs=args.epochs, callbacks=[checkpoint_callback], - default_root_dir=f'logs/{args.model}') + default_root_dir=f'logs/{args.model}', + limit_train_batches=10, limit_test_batches=10, + limit_val_batches=10, limit_predict_batches=10) trainer.fit(model, datamodule=datamodule) if args.evaluate: @@ -177,7 +200,12 @@ def configure_optimizers(self): datamodule.batch_size = 16 datamodule.sizes = [160] * len(args.sizes) # (Almost) no sampling... - trainer.test(model=model, datamodule=datamodule) + trainer.predict(model=model, datamodule=datamodule) + if args.profile: + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + trainer.predict(model=model, datamodule=datamodule) + p.step() # evaluator = MAG240MEvaluator() # loader = datamodule.hidden_test_dataloader()