Add matlab utils for convergence plotting.

OPM · Mar 15, 2024 · 26e0f49 · 26e0f49
1 parent 5afe95c
commit 26e0f49
Show file tree

Hide file tree

Showing 9 changed files with 6,190 additions and 0 deletions.
diff --git a/matlab/LICENSE b/matlab/LICENSE
diff --git a/matlab/LICENSE_spider_plot.txt b/matlab/LICENSE_spider_plot.txt
@@ -0,0 +1,24 @@
+Copyright (c) 2020-2023, Moses
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the distribution
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/matlab/README b/matlab/README
@@ -0,0 +1,10 @@
+Various MATLAB utilities useful to analyse OPM results.
+
+Uses spider_plot obtained from MATLAB Central. This has been included in this
+directory for convenience, spider_plot license is in LICENCE_spider_plot.txt
+
+License for the other files:
+analyseOpmNonlinConv.m
+readInfoIter.m
+visualiseOpmNonlinConv.m
+is the GPLv3, in the file LICENSE.
diff --git a/matlab/analyseOpmNonlinConv.m b/matlab/analyseOpmNonlinConv.m
@@ -0,0 +1,180 @@
+function [errors, labels, metrics] = ...
+      analyseOpmNonlinConv(mb, cnv, curvePos, raw, varargin)
+%Calculate Error Metrics and Iteration Flags From OPM's INFOITER Data
+%
+% SYNOPSIS:
+%   [errors, labels, metrics] = ...
+%       analyseOpmNonLinConv(mb, cnv, curvePos, raw)
+%
+%   [errors, labels, metrics] = ...
+%       analyseOpmNonLinConv(mb, cnv, curvePos, raw, 'pn1', pv1, ...)
+%
+% PARAMETERS:
+%   mb       - Material balance structure from readInfoIter.
+%
+%   cnv      - Convergence metric structure from readInfoIter.
+%
+%   curvePos - Indirection map 'curve_pos' from readInfoIter.
+%
+%   raw      - Raw INFOITER data from readInfoIter.
+%
+%   'pn'/pv  - Optional parameters passed as 'key'/value pairs.  Supported
+%              options are:
+%                * tol -- Convergence tolerances.  Scalar structure with
+%                         known fields 'mb' and 'cnv' defining convergence
+%                         thresholds for the material balance and CNV
+%                         metrics, respectively.  Default thresholds are
+%                         the OPM defaults of mb = 1.0e-6 and cnv = 1.0e-3.
+%                         If you omit one of the fields then the internal
+%                         default threshold value will be used in its place.
+%
+%                * maxFail -- Upper bound on number of failures to accept a
+%                         given timestep.  Default value: maxFail = 5.
+%
+%                * distDecrFactor -- Target factor by which the distance to
+%                         the convergence box is expected to decrease from
+%                         one non-linear iteration to the next in order to
+%                         be considered acceptable.
+%
+%                * inferFailure -- Callback function by which to identify
+%                         number of convergence failures within each
+%                         non-linear iteration.  Must be a reduction
+%                         function which converts an m-by-k array of DOUBLE
+%                         to an m-by-1 array of integers. Default value:
+%                         @(e) sum(e > 0, 2).
+%
+%                * calcDistance -- Callback function by which to compute
+%                         the distance to the convergence box within each
+%                         non-linear iteration.  Must be a reduction
+%                         function which converts an m-by-k array of DOUBLE
+%                         to an m-by-1 array of DOUBLE.  Default value:
+%                         @(e) sum(e, 2).
+%
+% RETURNS:
+%   errors  - Error measures.  An m-by-k array of non-negative DOUBLE
+%             values, with 'm' being the total number of non-linear
+%             iterations in the run (curvePos(end) - 1), and 'k' being the
+%             total number of material balance and CNV metrics.  Individual
+%             elements are zero if the corresponding metric is within the
+%             convergence box.
+%
+%   labels  - Convergence metric axis labels.  A 1-by-k cell array of
+%             character vectors created by concatenating the input labels
+%             for the 'mb' and 'cnv' measures.
+%
+%   metrics - Inferred convergence indicators for the non-linear solution
+%             process.  Scalar structure with the following fields:
+%               * fail -- Failure counts.  An m-by-1 array of non-negative
+%                         integers.
+%
+%               * dist -- Convergence box distances.  An m-by-1 array of
+%                         non-negative DOUBLE values.
+%
+%               * conv -- Status flags for step convergence.  An nstep-by-1
+%                         LOGICAL array which is TRUE for converged steps
+%                         and FALSE otherwise.  The number of steps is
+%                         NUMEL(curvePos) - 1.
+%
+%               * flag -- Failure flags.  An m-by-2 array of non-negative
+%                         integers.  Flag(:,1) identifies component
+%                         convergence failures and increases by one if the
+%                         number of failed components increases from one
+%                         non-linear iteration to the next.  Flag(:,2)
+%                         identifies convergence rate failures and
+%                         increases by one if the convergence box distance
+%                         does not decrease at a sufficient rate from one
+%                         non-linear iteration to the next.
+%
+%                         Both flags are reset to zero at the start of the
+%                         next timestep (ministep).
+%
+%               * flaggedSteps -- Index of potentially interesting
+%                         timesteps (ministeps).  These are the timesteps
+%                         for which the total number of flags, summed over
+%                         all flag categories, exceed the accepted upper
+%                         bound ('maxFail' option value).
+%
+%               * maxFail -- Copy of 'maxFail' option value.  Typically for
+%                         use by post-processors/visualisers.
+%
+% SEE ALSO:
+%   readInfoIter, visualiseOpmNonlinConv.
+
+   opt = struct('tol'           , default_tolerances(), ...
+                'maxFail'       , 5, ...
+                'distDecrFactor', 0.75, ...
+                'inferFailure'  , @(e) sum(e > 0, 2), ...
+                'calcDistance'  , @(e) sum(e, 2));
+   opt = merge_options(opt, varargin{:});
+
+   [errors, labels] = compute_errors(mb, cnv, opt);
+   [fail, dist, flag] = analyse_errors(errors, opt, curvePos);
+
+   flaggedSteps = ...
+      find(sum(flag(curvePos(2:end) - 1, :), 2) > opt.maxFail);
+
+   metrics = struct('fail', fail, 'dist', dist, ...
+                    'flag', flag, 'maxFail', opt.maxFail, ...
+                    'flaggedSteps', flaggedSteps, ...
+                    'conv', identify_successful_steps(curvePos, raw));
+end
+
+%--------------------------------------------------------------------------
+
+function tol = default_tolerances()
+   tol = struct('mb', 1.0e-6, 'cnv', 1.0e-2);
+end
+
+%--------------------------------------------------------------------------
+
+function [error, labels] = compute_errors(mb, cnv, opt)
+   tol = merge_structures(default_tolerances(), opt.tol);
+
+   t = rldecode([tol.cnv, tol.mb], [numel(cnv.label), numel(mb.label)], 2);
+   error = max(log10(bsxfun(@rdivide, [cnv.value, mb.value], t)), 0);
+
+   labels = [ reshape(cnv.label, 1, []), reshape(mb.label, 1, []) ];
+end
+
+%--------------------------------------------------------------------------
+
+function [fail, dist, flag] = analyse_errors(e, opt, curvePos)
+   fail = opt.inferFailure(e);
+   dist = opt.calcDistance(e);
+
+   iterIx = mcolon(curvePos(1 : end - 1) + 1, curvePos(2 : end) - 1);
+
+   flag = zeros([numel(fail), 2]);
+   flag(iterIx, :) = ...
+      double([fail(iterIx) >                    fail(iterIx - 1), ...
+      dist(iterIx) > opt.distDecrFactor*dist(iterIx - 1)]);
+
+   flag = cumsum(flag, 1);
+   flag = flag - rldecode(flag(curvePos(1:end-1), :), diff(curvePos));
+end
+
+%--------------------------------------------------------------------------
+
+function success = identify_successful_steps(curvePos, raw)
+   step = [raw.ReportStep, raw.TimeStep];
+   last = curvePos(2 : end - 1) - 1;  % Last iteration of previous step
+   frst = last + 1;                   % First iteration of current step
+
+   % Step succeeded if we advanced to next
+   %   1. Report step (step(:,1))
+   %   2. Time step within current report step (step(:,2), "ministep")
+   success = [any(step(frst,:) > step(last,:), 2); true];
+end
+
+%--------------------------------------------------------------------------
+
+function s = merge_structures(s, s2)
+   vargs = reshape([reshape(fieldnames (s2), 1, []); ...
+                    reshape(struct2cell(s2), 1, [])], 1, []);
+
+   [s, extra] = merge_options(s, vargs{:});
+
+   for kv = reshape(extra, 2, [])
+      s.(kv{1}) = kv{2};
+   end
+end
diff --git a/matlab/readInfoIter.m b/matlab/readInfoIter.m
@@ -0,0 +1,83 @@
+function [mb, cnv, curve_pos, raw] = readInfoIter(fname)
+%Import INFOITER Information From OPM Flow Simulation
+%
+% SYNOPSIS:
+%   [mb, cnv, curve_pos, raw] = readInfoIter(fname)
+%
+% PARAMETERS:
+%   fname - Filename.  Assumed to name a .INFOITER file from an OPM Flow
+%           simulation run.  This file will be opened using FOPEN mode
+%           'rt' and read using function TEXTSCAN.
+%
+% RETURNS:
+%   mb        - Material Balance values.  Collection of all applicable MB_*
+%               columns.  Structure with the following fields
+%                  * value -- Numerical values
+%                  * label -- Cell array of character vectors
+%
+%   cnv       - Convergence Metric values.  Collection of all applicable
+%               CNV_* columns.  Structure with the following fields
+%                  * value -- Numerical values
+%                  * label -- Cell array of character vectors
+%
+%   curve_pos - Indirection map into the curve values.  In particular,
+%               rows [curve_pos(i) : curve_pos(i + 1) - 1] in 'mb.value'
+%               and 'cnv.value' hold the values for curve/convergence
+%               history 'i'.
+%
+%   raw       - Raw data from INFOITER file.  Mostly for debugging
+%               purposes.
+%
+% SEE ALSO:
+%   fopen, textscan.
+
+   [fid, msg] = fopen(fname, 'rt');
+   if fid < 0
+      error('FileOpen:Failure', ...
+            'Failed to Open INFOITER File ''%s'': %s', fname, msg);
+   end
+
+   c = onCleanup(@() fclose(fid));
+
+   [raw, mb, cnv] = read_raw_data(fid);
+   curve_pos = compute_start_positions(raw.Iteration);
+end
+
+%--------------------------------------------------------------------------
+
+function [raw, mb, cnv] = read_raw_data(fid)
+   header = textscan(fgetl(fid), '%s');
+   fmt    = repmat({'%f '}, size(header{1}));
+
+   % Add more string/categorical columns as needed.
+   is_categorical = identify_columns(header, { 'WellStatus' });
+   fmt(is_categorical) = {'%C '};
+
+   fmt    = [reshape(fmt, 1, []), {'%*[^\n]'}];
+   values = textscan(fid, [fmt{:}]);
+   raw    = cell2struct(values, header{1}, 2);
+
+   collect_colums0 = @(ix) ...
+      struct('value', [ values{ix} ], ...
+             'label', { regexprep(header{1}(ix), '_', '.') });
+
+   collect_columns = @(pattern) ...
+      collect_colums0(~ cellfun('isempty', regexp(header{1}, pattern)));
+
+   mb  = collect_columns('^MB_');
+   cnv = collect_columns('^CNV_');
+end
+
+%--------------------------------------------------------------------------
+
+function curve_pos = compute_start_positions(iters)
+   curve_pos = find([true; iters(2:end) < iters(1:end-1); true]);
+end
+
+%--------------------------------------------------------------------------
+
+function col = identify_columns(header, columns)
+   [i, j] = blockDiagIndex(numel(header{1}), numel(columns));
+   col = any(reshape(strcmp(header{1}(i), columns(j)), ...
+                     numel(header{1}), []), 2);
+end