Skip to content

Commit

Permalink
add create_graph for jacobian and hessian in order to reducing unnece…
Browse files Browse the repository at this point in the history
…ssary GPU mem when derivatives involved in predicting progress (#600)
  • Loading branch information
HydrogenSulfate authored Oct 26, 2023
1 parent f475b12 commit da4f020
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 9 deletions.
4 changes: 2 additions & 2 deletions examples/bubble/bubble.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,8 @@ def transform_out(in_, out):
psi_y = out["psi"]
y = in_["y"]
x = in_["x"]
u = jacobian(psi_y, y)
v = -jacobian(psi_y, x)
u = jacobian(psi_y, y, create_graph=False)
v = -jacobian(psi_y, x, create_graph=False)
return {"u": u, "v": v}

# register transform
Expand Down
53 changes: 46 additions & 7 deletions ppsci/autodiff/ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,13 @@ def __init__(self, ys: "paddle.Tensor", xs: "paddle.Tensor"):

self.J: Dict[str, paddle.Tensor] = {}

def __call__(self, i: int = 0, j: Optional[int] = None) -> "paddle.Tensor":
def __call__(
self,
i: int = 0,
j: Optional[int] = None,
retain_graph: Optional[bool] = None,
create_graph: bool = True,
) -> "paddle.Tensor":
"""Returns J[`i`][`j`]. If `j` is ``None``, returns the gradient of y_i, i.e.,
J[i].
"""
Expand All @@ -56,7 +62,9 @@ def __call__(self, i: int = 0, j: Optional[int] = None) -> "paddle.Tensor":
# Compute J[i]
if i not in self.J:
y = self.ys[:, i : i + 1] if self.dim_y > 1 else self.ys
self.J[i] = paddle.grad(y, self.xs, create_graph=True)[0]
self.J[i] = paddle.grad(
y, self.xs, retain_graph=retain_graph, create_graph=create_graph
)[0]

return self.J[i] if (j is None or self.dim_x == 1) else self.J[i][:, j : j + 1]

Expand All @@ -82,6 +90,8 @@ def __call__(
xs: "paddle.Tensor",
i: int = 0,
j: Optional[int] = None,
retain_graph: Optional[bool] = None,
create_graph: bool = True,
) -> "paddle.Tensor":
"""Compute jacobians for given ys and xs.
Expand All @@ -90,6 +100,15 @@ def __call__(
xs (paddle.Tensor): Input tensor.
i (int, optional): i-th output variable. Defaults to 0.
j (Optional[int]): j-th input variable. Defaults to None.
retain_graph (Optional[bool]): whether to retain the forward graph which
is used to calculate the gradient. When it is True, the graph would
be retained, in which way users can calculate backward twice for the
same graph. When it is False, the graph would be freed. Default None,
which means it is equal to `create_graph`.
create_graph (bool, optional): whether to create the gradient graphs of
the computing process. When it is True, higher order derivatives are
supported to compute; when it is False, the gradient graphs of the
computing process would be discarded. Default False.
Returns:
paddle.Tensor: Jacobian matrix of ys[i] to xs[j].
Expand All @@ -105,7 +124,7 @@ def __call__(
key = (ys, xs)
if key not in self.Js:
self.Js[key] = _Jacobian(ys, xs)
return self.Js[key](i, j)
return self.Js[key](i, j, retain_graph, create_graph)

def _clear(self):
"""Clear cached Jacobians."""
Expand Down Expand Up @@ -157,12 +176,21 @@ def __init__(
component = 0

if grad_y is None:
grad_y = jacobian(ys, xs, i=component, j=None)
# `create_graph` of first order(jacobian) should be `True` in _Hessian.
grad_y = jacobian(
ys, xs, i=component, j=None, retain_graph=None, create_graph=True
)
self.H = _Jacobian(grad_y, xs)

def __call__(self, i: int = 0, j: int = 0):
def __call__(
self,
i: int = 0,
j: int = 0,
retain_graph: Optional[bool] = None,
create_graph: bool = True,
):
"""Returns H[`i`][`j`]."""
return self.H(i, j)
return self.H(i, j, retain_graph, create_graph)


class Hessians:
Expand All @@ -188,6 +216,8 @@ def __call__(
i: int = 0,
j: int = 0,
grad_y: Optional["paddle.Tensor"] = None,
retain_graph: Optional[bool] = None,
create_graph: bool = True,
) -> "paddle.Tensor":
"""Compute hessian matrix for given ys and xs.
Expand All @@ -201,6 +231,15 @@ def __call__(
j (int, optional): j-th input variable. Defaults to 0.
grad_y (Optional[paddle.Tensor]): The gradient of `y` w.r.t. `xs`. Provide `grad_y` if known to avoid
duplicate computation. Defaults to None.
retain_graph (Optional[bool]): whether to retain the forward graph which
is used to calculate the gradient. When it is True, the graph would
be retained, in which way users can calculate backward twice for the
same graph. When it is False, the graph would be freed. Default None,
which means it is equal to `create_graph`.
create_graph (bool, optional): whether to create the gradient graphs of
the computing process. When it is True, higher order derivatives are
supported to compute; when it is False, the gradient graphs of the
computing process would be discarded. Default False.
Returns:
paddle.Tensor: Hessian matrix.
Expand All @@ -216,7 +255,7 @@ def __call__(
key = (ys, xs, component)
if key not in self.Hs:
self.Hs[key] = _Hessian(ys, xs, component=component, grad_y=grad_y)
return self.Hs[key](i, j)
return self.Hs[key](i, j, retain_graph, create_graph)

def _clear(self):
"""Clear cached Hessians."""
Expand Down

0 comments on commit da4f020

Please sign in to comment.