From 577978aa5563d6bf8da864e4f5cf0cbcba5bf16a Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Thu, 8 Aug 2024 17:29:12 -0500 Subject: [PATCH 1/5] Remove MathNet.Numerics, embed the one class we actually used from it --- Packages.md | 1 - SynthEHR.Core/Datasets/BiochemistryRecord.cs | 2 +- SynthEHR.Core/Datasets/DataGenerator.cs | 2 +- .../Distributions/IContinuousDistribution.cs | 57 ++++ .../Statistics/Distributions/IDistribution.cs | 41 +++ .../Distributions/IUnivariateDistribution.cs | 66 ++++ .../Statistics/Distributions/Normal.cs | 299 ++++++++++++++++++ SynthEHR.Core/SynthEHR.Core.csproj | 3 +- 8 files changed, 466 insertions(+), 5 deletions(-) create mode 100644 SynthEHR.Core/Statistics/Distributions/IContinuousDistribution.cs create mode 100644 SynthEHR.Core/Statistics/Distributions/IDistribution.cs create mode 100644 SynthEHR.Core/Statistics/Distributions/IUnivariateDistribution.cs create mode 100644 SynthEHR.Core/Statistics/Distributions/Normal.cs diff --git a/Packages.md b/Packages.md index 5b27cd3..bdbace7 100644 --- a/Packages.md +++ b/Packages.md @@ -13,7 +13,6 @@ | CsvHelper | [GitHub](https://github.com/JoshClose/CsvHelper) | MS-PL / Apache 2.0 | Enables reading/writing CSV files | | [Equ](https://github.com/thedmi/Equ) | [GitHub](https://github.com/thedmi/Equ) | [MIT](https://opensource.org/licenses/MIT) | Simplifies object comparators | | HIC.FAnsiSql | [GitHub](https://github.com/HicServices/FAnsiSql) | [GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.html) | DBMS abstraction layer | -| MathNet.Numerics | [GitHub](https://github.com/mathnet/mathnet-numerics)| [MIT](https://opensource.org/licenses/MIT) | Generate statistical distributions (e.g. Gaussian) for random data | | | Microsoft.SourceLink.GitHub | [GitHub](https://github.com/dotnet/sourcelink) | [1.1.1](https://www.nuget.org/packages/Microsoft.SourceLink.GitHub/1.1.1) | [Apache License 2.0](https://github.com/dotnet/sourcelink/blob/master/License.txt) | Enables source debugging of project nuget package| | | YamlDotNet | [GitHub](https://github.com/aaubry/YamlDotNet) | [MIT](https://opensource.org/licenses/MIT) | Loading configuration files | diff --git a/SynthEHR.Core/Datasets/BiochemistryRecord.cs b/SynthEHR.Core/Datasets/BiochemistryRecord.cs index fc3c52d..5bcc24a 100644 --- a/SynthEHR.Core/Datasets/BiochemistryRecord.cs +++ b/SynthEHR.Core/Datasets/BiochemistryRecord.cs @@ -8,7 +8,7 @@ using System.Data; using System.Globalization; using System.Linq; -using MathNet.Numerics.Distributions; +using Normal = SynthEHR.Statistics.Distributions.Normal; namespace SynthEHR.Datasets; diff --git a/SynthEHR.Core/Datasets/DataGenerator.cs b/SynthEHR.Core/Datasets/DataGenerator.cs index 8f6929a..75d73ce 100644 --- a/SynthEHR.Core/Datasets/DataGenerator.cs +++ b/SynthEHR.Core/Datasets/DataGenerator.cs @@ -15,7 +15,7 @@ using System.Text; using CsvHelper; using CsvHelper.Configuration; -using MathNet.Numerics.Distributions; +using Normal = SynthEHR.Statistics.Distributions.Normal; namespace SynthEHR.Datasets; diff --git a/SynthEHR.Core/Statistics/Distributions/IContinuousDistribution.cs b/SynthEHR.Core/Statistics/Distributions/IContinuousDistribution.cs new file mode 100644 index 0000000..81ec011 --- /dev/null +++ b/SynthEHR.Core/Statistics/Distributions/IContinuousDistribution.cs @@ -0,0 +1,57 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2014 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace SynthEHR.Statistics.Distributions; + +/// +/// Continuous Univariate Probability Distribution. +/// +internal interface IContinuousDistribution : IUnivariateDistribution +{ + /// + /// Gets the mode of the distribution. + /// + double Mode { get; } + + /// + /// Gets the smallest element in the domain of the distribution which can be represented by a double. + /// + double Minimum { get; } + + /// + /// Gets the largest element in the domain of the distribution which can be represented by a double. + /// + double Maximum { get; } + + /// + /// Draws a random sample from the distribution. + /// + /// a sample from the distribution. + double Sample(); +} \ No newline at end of file diff --git a/SynthEHR.Core/Statistics/Distributions/IDistribution.cs b/SynthEHR.Core/Statistics/Distributions/IDistribution.cs new file mode 100644 index 0000000..17e6964 --- /dev/null +++ b/SynthEHR.Core/Statistics/Distributions/IDistribution.cs @@ -0,0 +1,41 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace SynthEHR.Statistics.Distributions; + +/// +/// Probability Distribution. +/// +internal interface IDistribution +{ + /// + /// Gets or sets the random number generator which is used to draw random samples. + /// + System.Random RandomSource { get; set; } +} \ No newline at end of file diff --git a/SynthEHR.Core/Statistics/Distributions/IUnivariateDistribution.cs b/SynthEHR.Core/Statistics/Distributions/IUnivariateDistribution.cs new file mode 100644 index 0000000..0079eff --- /dev/null +++ b/SynthEHR.Core/Statistics/Distributions/IUnivariateDistribution.cs @@ -0,0 +1,66 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace SynthEHR.Statistics.Distributions; + +/// +/// Univariate Probability Distribution. +/// +internal interface IUnivariateDistribution:IDistribution +{ + /// + /// Gets the mean of the distribution. + /// + double Mean { get; } + + /// + /// Gets the variance of the distribution. + /// + double Variance { get; } + + /// + /// Gets the standard deviation of the distribution. + /// + double StdDev { get; } + + /// + /// Gets the entropy of the distribution. + /// + double Entropy { get; } + + /// + /// Gets the skewness of the distribution. + /// + double Skewness { get; } + + /// + /// Gets the median of the distribution. + /// + double Median { get; } +} \ No newline at end of file diff --git a/SynthEHR.Core/Statistics/Distributions/Normal.cs b/SynthEHR.Core/Statistics/Distributions/Normal.cs new file mode 100644 index 0000000..bce8bac --- /dev/null +++ b/SynthEHR.Core/Statistics/Distributions/Normal.cs @@ -0,0 +1,299 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; + +namespace SynthEHR.Statistics.Distributions; + +/// +/// Continuous Univariate Normal distribution, also known as Gaussian distribution. +/// For details about this distribution, see +/// Wikipedia - Normal distribution. +/// +internal sealed class Normal : IContinuousDistribution +{ + private Random _random; + + private readonly double _mean; + private readonly double _stdDev; + + /// + /// Initializes a new instance of the Normal class. This is a normal distribution with mean 0.0 + /// and standard deviation 1.0. The distribution will + /// be initialized with the default random number generator. + /// + public Normal() + : this(0.0,1.0) + { + } + + /// + /// Initializes a new instance of the Normal class. This is a normal distribution with mean 0.0 + /// and standard deviation 1.0. The distribution will + /// be initialized with the default random number generator. + /// + /// The random number generator which is used to draw random samples. + public Normal(Random randomSource) + : this(0.0,1.0,randomSource) + { + } + + /// + /// Initializes a new instance of the Normal class with a particular mean and standard deviation. The distribution will + /// be initialized with the default random number generator. + /// + /// The mean (μ) of the normal distribution. + /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. + public Normal(double mean,double stddev) + { + if (!IsValidParameterSet(mean,stddev)) + { + throw new ArgumentException("Invalid parameterization for the distribution."); + } + + _random = new Random(); + _mean = mean; + _stdDev = stddev; + } + + /// + /// Initializes a new instance of the Normal class with a particular mean and standard deviation. The distribution will + /// be initialized with the default random number generator. + /// + /// The mean (μ) of the normal distribution. + /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. + /// The random number generator which is used to draw random samples. + public Normal(double mean,double stddev,Random randomSource) + { + if (!IsValidParameterSet(mean,stddev)) + { + throw new ArgumentException("Invalid parametrization for the distribution."); + } + + _random = randomSource ?? new Random(); + _mean = mean; + _stdDev = stddev; + } + + /// + /// Constructs a normal distribution from a mean and standard deviation. + /// + /// The mean (μ) of the normal distribution. + /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. + /// The random number generator which is used to draw random samples. Optional, can be null. + /// a normal distribution. + public static Normal WithMeanStdDev(double mean,double stddev,Random randomSource = null) + { + return new Normal(mean,stddev,randomSource); + } + + /// + /// Constructs a normal distribution from a mean and variance. + /// + /// The mean (μ) of the normal distribution. + /// The variance (σ^2) of the normal distribution. + /// The random number generator which is used to draw random samples. Optional, can be null. + /// A normal distribution. + public static Normal WithMeanVariance(double mean,double var,Random randomSource = null) + { + return new Normal(mean,Math.Sqrt(var),randomSource); + } + + /// + /// Constructs a normal distribution from a mean and precision. + /// + /// The mean (μ) of the normal distribution. + /// The precision of the normal distribution. + /// The random number generator which is used to draw random samples. Optional, can be null. + /// A normal distribution. + public static Normal WithMeanPrecision(double mean,double precision,Random randomSource = null) + { + return new Normal(mean,1.0/Math.Sqrt(precision),randomSource); + } + + /// + /// A string representation of the distribution. + /// + /// a string representation of the distribution. + public override string ToString() + { + return $"Normal(μ = {_mean}, σ = {_stdDev})"; + } + + /// + /// Tests whether the provided values are valid parameters for this distribution. + /// + /// The mean (μ) of the normal distribution. + /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. + public static bool IsValidParameterSet(double mean,double stddev) + { + return stddev >= 0.0 && !double.IsNaN(mean); + } + + /// + /// Gets the mean (μ) of the normal distribution. + /// + public double Mean => _mean; + + /// + /// Gets the standard deviation (σ) of the normal distribution. Range: σ ≥ 0. + /// + public double StdDev => _stdDev; + + /// + /// Gets the variance of the normal distribution. + /// + public double Variance => _stdDev*_stdDev; + + /// + /// Gets the precision of the normal distribution. + /// + public double Precision => 1.0/(_stdDev*_stdDev); + + /// + /// Gets the random number generator which is used to draw random samples. + /// + public Random RandomSource + { + get => _random; + set => _random = value ?? new Random(); + } + + /// + /// Gets the entropy of the normal distribution. + /// + public double Entropy => Math.Log(_stdDev) + LogSqrt2PiE; + + private const double LogSqrt2PiE = 1.4189385332046727417803297364056176398613974736378d; + + /// + /// Gets the skewness of the normal distribution. + /// + public double Skewness => 0.0; + + /// + /// Gets the mode of the normal distribution. + /// + public double Mode => _mean; + + /// + /// Gets the median of the normal distribution. + /// + public double Median => _mean; + + /// + /// Gets the minimum of the normal distribution. + /// + public double Minimum => double.NegativeInfinity; + + /// + /// Gets the maximum of the normal distribution. + /// + public double Maximum => double.PositiveInfinity; + + /// + /// Computes the probability density of the distribution (PDF) at x, i.e. ∂P(X ≤ x)/∂x. + /// + /// The location at which to compute the density. + /// the density at . + public double Density(double x) + { + var d = (x - _mean)/_stdDev; + return Math.Exp(-0.5*d*d)/(Sqrt2Pi*_stdDev); + } + + private const double Sqrt2Pi = 2.5066282746310005024157652848110452530069867406099d; + + /// + /// Computes the log probability density of the distribution (lnPDF) at x, i.e. ln(∂P(X ≤ x)/∂x). + /// + /// The location at which to compute the log density. + /// the log density at . + public double DensityLn(double x) + { + var d = (x - _mean)/_stdDev; + return (-0.5*d*d) - Math.Log(_stdDev) - LogSqrt2Pi; + } + + private const double LogSqrt2Pi = 0.91893853320467274178032973640561763986139747363778; + + /// + /// Generates a sample from the normal distribution using the Box-Muller algorithm. + /// + /// a sample from the distribution. + public double Sample() + { + return SampleUnchecked(_random,_mean,_stdDev); + } + + internal static double SampleUnchecked(Random rnd,double mean,double stddev) + { + double x; + while (!PolarTransform(rnd.NextDouble(),rnd.NextDouble(),out x,out _)) + { + } + + return mean + (stddev*x); + } + + internal static IEnumerable SamplesUnchecked(Random rnd,double mean,double stddev) + { + while (true) + { + if (!PolarTransform(rnd.NextDouble(),rnd.NextDouble(),out var x,out var y)) + { + continue; + } + + yield return mean + (stddev*x); + yield return mean + (stddev*y); + } + } + + private const double InvPi = 0.31830988618379067153776752674502872406891929148091d; + + private static bool PolarTransform(double a,double b,out double x,out double y) + { + var v1 = (2.0*a) - 1.0; + var v2 = (2.0*b) - 1.0; + var r = (v1*v1) + (v2*v2); + if (r >= 1.0 || r == 0.0) + { + x = 0; + y = 0; + return false; + } + + var fac = Math.Sqrt(-2.0*Math.Log(r)/r); + x = v1*fac; + y = v2*fac; + return true; + } +} \ No newline at end of file diff --git a/SynthEHR.Core/SynthEHR.Core.csproj b/SynthEHR.Core/SynthEHR.Core.csproj index 6a1c22f..697ca67 100644 --- a/SynthEHR.Core/SynthEHR.Core.csproj +++ b/SynthEHR.Core/SynthEHR.Core.csproj @@ -46,11 +46,10 @@ - - + From a99c4466c4db86637c2badb3b2071590aca354f4 Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Tue, 13 Aug 2024 12:42:37 -0500 Subject: [PATCH 2/5] Make Person a record type, remove Generator.Equals, tidy Normal.cs and comment origins --- Packages.md | 1 - SynthEHR.Core/Person.cs | 57 +++----- .../Statistics/Distributions/Normal.cs | 131 +----------------- SynthEHR.Core/SynthEHR.Core.csproj | 1 - 4 files changed, 25 insertions(+), 165 deletions(-) diff --git a/Packages.md b/Packages.md index 88eadf7..36f6a85 100644 --- a/Packages.md +++ b/Packages.md @@ -11,7 +11,6 @@ | ------- | ------------| ------- | ------- | -------------------------- | | CommandLineParser | [GitHub](https://github.com/commandlineparser/commandline) | [MIT](https://opensource.org/licenses/MIT) | Allows command line arguments for main client application and CLI executables | | CsvHelper | [GitHub](https://github.com/JoshClose/CsvHelper) | MS-PL / Apache 2.0 | Enables reading/writing CSV files | -| Generator.Equals | [GitHub](https://github.com/diegofrata/Generator.Equals) | [MIT](https://opensource.org/licenses/MIT) | Simplifies object comparators | | HIC.FAnsiSql | [GitHub](https://github.com/HicServices/FAnsiSql) | [GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.html) | DBMS abstraction layer | | Microsoft.SourceLink.GitHub | [GitHub](https://github.com/dotnet/sourcelink) | [1.1.1](https://www.nuget.org/packages/Microsoft.SourceLink.GitHub/1.1.1) | [Apache License 2.0](https://github.com/dotnet/sourcelink/blob/master/License.txt) | Enables source debugging of project nuget package| | | YamlDotNet | [GitHub](https://github.com/aaubry/YamlDotNet) | [MIT](https://opensource.org/licenses/MIT) | Loading configuration files | diff --git a/SynthEHR.Core/Person.cs b/SynthEHR.Core/Person.cs index 04ff99e..d1dfdd2 100644 --- a/SynthEHR.Core/Person.cs +++ b/SynthEHR.Core/Person.cs @@ -4,9 +4,9 @@ // RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. // You should have received a copy of the GNU General Public License along with RDMP. If not, see . +#nullable enable using System; using System.Text; -using Generator.Equals; using SynthEHR.Datasets; namespace SynthEHR; @@ -14,10 +14,8 @@ namespace SynthEHR; /// /// Randomly generated person for whom datasets can be built /// -[Equatable] -public sealed partial class Person +public sealed record Person { - /// public string Forename { get; set; } /// @@ -36,7 +34,7 @@ public sealed partial class Person /// public DemographyAddress Address { get; set; } /// - public DemographyAddress PreviousAddress { get; set; } + public DemographyAddress? PreviousAddress { get; set; } /// /// Earliest year of birth to generate @@ -52,7 +50,7 @@ public sealed partial class Person /// /// The collection to which the patient belongs, may be null /// - private readonly PersonCollection _parent; + private readonly PersonCollection? _parent; /// /// Generates a new random person using the seeded random. This overload ensures that the generated @@ -60,17 +58,11 @@ public sealed partial class Person /// /// /// - public Person(Random r,PersonCollection collection):this(r) + public Person(Random? r, PersonCollection? collection = null) { _parent = collection; - } + r ??= Random.Shared; - /// - /// Generates a new random person using the seeded random - /// - /// - public Person(Random r) - { Gender = r.Next(2) switch { 0 => 'F', @@ -85,7 +77,7 @@ public Person(Random r) //1 in 10 patients is dead if (r.Next(10) == 0) - DateOfDeath = DataGenerator.GetRandomDateAfter(DateOfBirth, r); + DateOfDeath = DataGenerator.GetRandomDateAfter(DateOfBirth,r); else DateOfDeath = null; @@ -96,7 +88,7 @@ public Person(Random r) Address = new DemographyAddress(r); //one in 10 people doesn't have a previous address - if(r.Next(10) != 0) + if (r.Next(10) != 0) PreviousAddress = new DemographyAddress(r); } @@ -105,32 +97,24 @@ public Person(Random r) /// /// /// - public string GetRandomForename(Random r) - { - return Gender == 'F' ? CommonGirlForenames[r.Next(100)] : CommonBoyForenames[r.Next(100)]; - } + public string GetRandomForename(Random r) => Gender == 'F' ? CommonGirlForenames[r.Next(100)] : CommonBoyForenames[r.Next(100)]; /// /// Returns a random date after the patients date of birth (and before their death if they are dead). /// /// /// - public DateTime GetRandomDateDuringLifetime(Random r) - { - return DateOfDeath == null + public DateTime GetRandomDateDuringLifetime(Random r) => + DateOfDeath == null ? DataGenerator.GetRandomDateAfter(DateOfBirth, r) : DataGenerator.GetRandomDate(DateOfBirth, (DateTime)DateOfDeath, r); - } /// /// Returns a random surname from a list of common surnames /// /// /// - public static string GetRandomSurname(Random r) - { - return CommonSurnames[r.Next(100)]; - } + public static string GetRandomSurname(Random r) => CommonSurnames[r.Next(100)]; /// /// If the person died before onDate it returns NULL (as of onDate we did not know when the person would die). if onDate is > date of death it @@ -138,12 +122,12 @@ public static string GetRandomSurname(Random r) /// /// /// - public DateTime? GetDateOfDeathOrNullOn(DateTime onDate) - { - return onDate >= DateOfDeath ? DateOfDeath : + public DateTime? GetDateOfDeathOrNullOn(DateTime onDate) => + onDate >= DateOfDeath + ? DateOfDeath + : //we cannot predict the future, they are dead today, but you are pretending the date is onDate null; - } /// /// Returns a new random ANOCHI which does not exist in (if we have one) @@ -154,12 +138,10 @@ private string GetNovelANOCHI(Random r) { var anochi = GenerateANOCHI(r); - while(_parent != null && _parent.AlreadyGeneratedANOCHIs.Contains(anochi)) + while (_parent?.AlreadyGeneratedANOCHIs.Add(anochi) == false) anochi = GenerateANOCHI(r); - _parent?.AlreadyGeneratedANOCHIs.Add(anochi); return anochi; - } /// @@ -171,16 +153,15 @@ private string GetNovelCHI(Random r) { var chi = GetRandomCHI(r); - while(_parent != null && _parent.AlreadyGeneratedCHIs.Contains(chi)) + while (_parent?.AlreadyGeneratedCHIs.Add(chi) == false) chi = GetRandomCHI(r); - _parent?.AlreadyGeneratedCHIs.Add(chi); return chi; } private static string GenerateANOCHI(Random r) { - var toReturn = new StringBuilder(); + var toReturn = new StringBuilder(12); for (var i = 0; i < 10; i++) toReturn.Append(r.Next(10)); diff --git a/SynthEHR.Core/Statistics/Distributions/Normal.cs b/SynthEHR.Core/Statistics/Distributions/Normal.cs index bce8bac..d58c285 100644 --- a/SynthEHR.Core/Statistics/Distributions/Normal.cs +++ b/SynthEHR.Core/Statistics/Distributions/Normal.cs @@ -1,4 +1,8 @@ -// +// This is a partial copy of Normal.cs from Mathnet.Numerics 5.0.0 +// with the unused portions removed for embedding in SynthEHR. +// (We only use the basic functionality of the Normal distribution.) +// +// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics @@ -28,7 +32,6 @@ // using System; -using System.Collections.Generic; namespace SynthEHR.Statistics.Distributions; @@ -44,45 +47,6 @@ internal sealed class Normal : IContinuousDistribution private readonly double _mean; private readonly double _stdDev; - /// - /// Initializes a new instance of the Normal class. This is a normal distribution with mean 0.0 - /// and standard deviation 1.0. The distribution will - /// be initialized with the default random number generator. - /// - public Normal() - : this(0.0,1.0) - { - } - - /// - /// Initializes a new instance of the Normal class. This is a normal distribution with mean 0.0 - /// and standard deviation 1.0. The distribution will - /// be initialized with the default random number generator. - /// - /// The random number generator which is used to draw random samples. - public Normal(Random randomSource) - : this(0.0,1.0,randomSource) - { - } - - /// - /// Initializes a new instance of the Normal class with a particular mean and standard deviation. The distribution will - /// be initialized with the default random number generator. - /// - /// The mean (μ) of the normal distribution. - /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. - public Normal(double mean,double stddev) - { - if (!IsValidParameterSet(mean,stddev)) - { - throw new ArgumentException("Invalid parameterization for the distribution."); - } - - _random = new Random(); - _mean = mean; - _stdDev = stddev; - } - /// /// Initializes a new instance of the Normal class with a particular mean and standard deviation. The distribution will /// be initialized with the default random number generator. @@ -102,42 +66,6 @@ public Normal(double mean,double stddev,Random randomSource) _stdDev = stddev; } - /// - /// Constructs a normal distribution from a mean and standard deviation. - /// - /// The mean (μ) of the normal distribution. - /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. - /// The random number generator which is used to draw random samples. Optional, can be null. - /// a normal distribution. - public static Normal WithMeanStdDev(double mean,double stddev,Random randomSource = null) - { - return new Normal(mean,stddev,randomSource); - } - - /// - /// Constructs a normal distribution from a mean and variance. - /// - /// The mean (μ) of the normal distribution. - /// The variance (σ^2) of the normal distribution. - /// The random number generator which is used to draw random samples. Optional, can be null. - /// A normal distribution. - public static Normal WithMeanVariance(double mean,double var,Random randomSource = null) - { - return new Normal(mean,Math.Sqrt(var),randomSource); - } - - /// - /// Constructs a normal distribution from a mean and precision. - /// - /// The mean (μ) of the normal distribution. - /// The precision of the normal distribution. - /// The random number generator which is used to draw random samples. Optional, can be null. - /// A normal distribution. - public static Normal WithMeanPrecision(double mean,double precision,Random randomSource = null) - { - return new Normal(mean,1.0/Math.Sqrt(precision),randomSource); - } - /// /// A string representation of the distribution. /// @@ -152,7 +80,7 @@ public override string ToString() /// /// The mean (μ) of the normal distribution. /// The standard deviation (σ) of the normal distribution. Range: σ ≥ 0. - public static bool IsValidParameterSet(double mean,double stddev) + private static bool IsValidParameterSet(double mean,double stddev) { return stddev >= 0.0 && !double.IsNaN(mean); } @@ -172,11 +100,6 @@ public static bool IsValidParameterSet(double mean,double stddev) /// public double Variance => _stdDev*_stdDev; - /// - /// Gets the precision of the normal distribution. - /// - public double Precision => 1.0/(_stdDev*_stdDev); - /// /// Gets the random number generator which is used to draw random samples. /// @@ -218,32 +141,6 @@ public Random RandomSource /// public double Maximum => double.PositiveInfinity; - /// - /// Computes the probability density of the distribution (PDF) at x, i.e. ∂P(X ≤ x)/∂x. - /// - /// The location at which to compute the density. - /// the density at . - public double Density(double x) - { - var d = (x - _mean)/_stdDev; - return Math.Exp(-0.5*d*d)/(Sqrt2Pi*_stdDev); - } - - private const double Sqrt2Pi = 2.5066282746310005024157652848110452530069867406099d; - - /// - /// Computes the log probability density of the distribution (lnPDF) at x, i.e. ln(∂P(X ≤ x)/∂x). - /// - /// The location at which to compute the log density. - /// the log density at . - public double DensityLn(double x) - { - var d = (x - _mean)/_stdDev; - return (-0.5*d*d) - Math.Log(_stdDev) - LogSqrt2Pi; - } - - private const double LogSqrt2Pi = 0.91893853320467274178032973640561763986139747363778; - /// /// Generates a sample from the normal distribution using the Box-Muller algorithm. /// @@ -263,22 +160,6 @@ internal static double SampleUnchecked(Random rnd,double mean,double stddev) return mean + (stddev*x); } - internal static IEnumerable SamplesUnchecked(Random rnd,double mean,double stddev) - { - while (true) - { - if (!PolarTransform(rnd.NextDouble(),rnd.NextDouble(),out var x,out var y)) - { - continue; - } - - yield return mean + (stddev*x); - yield return mean + (stddev*y); - } - } - - private const double InvPi = 0.31830988618379067153776752674502872406891929148091d; - private static bool PolarTransform(double a,double b,out double x,out double y) { var v1 = (2.0*a) - 1.0; diff --git a/SynthEHR.Core/SynthEHR.Core.csproj b/SynthEHR.Core/SynthEHR.Core.csproj index 4922ae6..a323688 100644 --- a/SynthEHR.Core/SynthEHR.Core.csproj +++ b/SynthEHR.Core/SynthEHR.Core.csproj @@ -45,7 +45,6 @@ - From 21162ef7cc2eaa621c0ec8da485c3d5b138f433a Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Tue, 13 Aug 2024 12:46:02 -0500 Subject: [PATCH 3/5] Update SynthEHR.Core.csproj Actually remove Mathnet.Numerics now we aren't using it --- SynthEHR.Core/SynthEHR.Core.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/SynthEHR.Core/SynthEHR.Core.csproj b/SynthEHR.Core/SynthEHR.Core.csproj index a323688..7368656 100644 --- a/SynthEHR.Core/SynthEHR.Core.csproj +++ b/SynthEHR.Core/SynthEHR.Core.csproj @@ -45,7 +45,6 @@ - From aed2ad040f2d23801680234d3a8d304d8876ed2e Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Tue, 13 Aug 2024 12:47:07 -0500 Subject: [PATCH 4/5] Update testpack.yml Newer Ubuntu, and use the 8.x SDK not 6.x --- .github/workflows/testpack.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/testpack.yml b/.github/workflows/testpack.yml index 2e7b11b..d12b937 100644 --- a/.github/workflows/testpack.yml +++ b/.github/workflows/testpack.yml @@ -4,12 +4,12 @@ on: push jobs: package: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-dotnet@v4 with: - dotnet-version: '6.0.x' + dotnet-version: '8.0.x' - name: Test run: dotnet test --nologo - name: Package From 697f49ec3225e7ab89a7176bbb91f14e8497b9e7 Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Tue, 13 Aug 2024 12:54:04 -0500 Subject: [PATCH 5/5] Tidy, fix missing Dispose --- SynthEHR.Core/Datasets/Demography.cs | 281 +++++++++--------- .../Datasets/HospitalAdmissionsRecord.cs | 2 +- 2 files changed, 138 insertions(+), 145 deletions(-) diff --git a/SynthEHR.Core/Datasets/Demography.cs b/SynthEHR.Core/Datasets/Demography.cs index 0ae4caa..8391b6e 100644 --- a/SynthEHR.Core/Datasets/Demography.cs +++ b/SynthEHR.Core/Datasets/Demography.cs @@ -10,191 +10,184 @@ namespace SynthEHR.Datasets; /// /// -public class Demography(Random rand) : DataGenerator(rand) +public sealed class Demography(Random rand) : DataGenerator(rand) { - /// public override object[] GenerateTestDataRow(Person person) { - //leave off data load run ID - var values = new object[39]; + //leave off data load run ID + var values = new object[39]; - values[0] = person.CHI; - values[1] = GetRandomDateAfter(person.DateOfBirth,r);//all records must have been created after the person was born + values[0] = person.CHI; + values[1] = GetRandomDateAfter(person.DateOfBirth,r);//all records must have been created after the person was born - if(r.Next(0, 2) == 0) - values[2] = true; - else - values[2] = false; + if (r.Next(0,2) == 0) + values[2] = true; + else + values[2] = false; - values[3] = "Random record"; + values[3] = "Random record"; - if(r.Next(0,10 )== 0)//one in 10 records has one of these (an ALIAS chi) - values[4] = person.GetRandomCHI(r); + if (r.Next(0,10)== 0)//one in 10 records has one of these (an ALIAS chi) + values[4] = person.GetRandomCHI(r); - values[5] = GetRandomCHIStatus(r); - values[6] = person.DateOfBirth.Year.ToString()[..2]; - values[7] = person.Surname; - values[8] = person.Forename; - values[9] = person.Gender; + values[5] = GetRandomCHIStatus(r); + values[6] = person.DateOfBirth.Year.ToString()[..2]; + values[7] = person.Surname; + values[8] = person.Forename; + values[9] = person.Gender; - var randomAddress = new DemographyAddress(r); + var randomAddress = new DemographyAddress(r); - //if person is dead and dtCreated is after they died use the same address otherwise use a random one (all records after a person dies have same address) - values[10] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line1: randomAddress.Line1; - values[11] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line2: randomAddress.Line2; - values[12] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line3: randomAddress.Line3; - values[13] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line4: randomAddress.Line4; - values[14] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Postcode.Value: randomAddress.Postcode.Value; + //if person is dead and dtCreated is after they died use the same address otherwise use a random one (all records after a person dies have same address) + values[10] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line1 : randomAddress.Line1; + values[11] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line2 : randomAddress.Line2; + values[12] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line3 : randomAddress.Line3; + values[13] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Line4 : randomAddress.Line4; + values[14] = person.DateOfDeath != null && (DateTime)values[1]>person.DateOfDeath ? person.Address.Postcode.Value : randomAddress.Postcode.Value; - //if the person is dead and the dtCreated of the record is greater than the date of death populate it - values[15] = person.GetDateOfDeathOrNullOn((DateTime)values[1]); //pass record creation date and get isdead date back + //if the person is dead and the dtCreated of the record is greater than the date of death populate it + values[15] = person.GetDateOfDeathOrNullOn((DateTime)values[1]); //pass record creation date and get isdead date back - //if we got a date put the source in as R - if(values[15] != null) - values[16] = 'R'; + //if we got a date put the source in as R + if (values[15] != null) + values[16] = 'R'; - if(!string.IsNullOrWhiteSpace(person.Address.Postcode.District)) - values[17] = person.Address.Postcode.District[..1]; + if (!string.IsNullOrWhiteSpace(person.Address.Postcode.District)) + values[17] = person.Address.Postcode.District[..1]; - values[18] = GetRandomLetter(true,r); + values[18] = GetRandomLetter(true,r); - //healthboard 'A' use padding on the name field (to a length of 10!) - if((char)values[18] == 'A') - if (values[8] != null) - while (values[8].ToString()?.Length < 10) - values[8] = $"{values[8]} "; + //healthboard 'A' use padding on the name field (to a length of 10!) + if ((char)values[18] == 'A') + if (values[8] != null) + while (values[8].ToString()?.Length < 10) + values[8] = $"{values[8]} "; - //in healthboard 'B' they give us both forename and surname in the same field! - and surname is always blank - if ((char)values[18] == 'B') - { - values[8] = $"{values[8]} {values[7]}"; - values[7] = null; - } + //in healthboard 'B' they give us both forename and surname in the same field! - and surname is always blank + if ((char)values[18] == 'B') + { + values[8] = $"{values[8]} {values[7]}"; + values[7] = null; + } - values[19] = GetRandomGPCode(r); + values[19] = GetRandomGPCode(r); - //birth surname and previous surname fields, sparsely populated - if (r.Next(0, 10) == 0) - values[20] = Person.GetRandomSurname(r); - if (r.Next(0, 10) == 0) - values[21] = Person.GetRandomSurname(r); + //birth surname and previous surname fields, sparsely populated + if (r.Next(0,10) == 0) + values[20] = Person.GetRandomSurname(r); + if (r.Next(0,10) == 0) + values[21] = Person.GetRandomSurname(r); - if (r.Next(0, 3) == 0) - values[22] = person.GetRandomForename(r); //random gender appropriate middle name for 1 person in 3 + if (r.Next(0,3) == 0) + values[22] = person.GetRandomForename(r); //random gender appropriate middle name for 1 person in 3 - if (r.Next(0, 5) == 0) - values[23] = person.GetRandomForename(r); //alternate forename + if (r.Next(0,5) == 0) + values[23] = person.GetRandomForename(r); //alternate forename - if(r.Next(0,3)==0) - values[24] = GetRandomLetter(true,r); //one in 3 has an initial + if (r.Next(0,3)==0) + values[24] = GetRandomLetter(true,r); //one in 3 has an initial - //people only have previous addresses if they are alive - if(r.Next(0, 2) == 0 && person.DateOfDeath != null) - { - var randomAddress2 = new DemographyAddress(r); + //people only have previous addresses if they are alive + if (r.Next(0,2) == 0 && person.DateOfDeath != null) + { + var randomAddress2 = new DemographyAddress(r); - values[25] = randomAddress2.Line1; - values[26] = randomAddress2.Line2; - values[27] = randomAddress2.Line3; - values[28] = randomAddress2.Line4; - values[29] = randomAddress2.Postcode.Value; + values[25] = randomAddress2.Line1; + values[26] = randomAddress2.Line2; + values[27] = randomAddress2.Line3; + values[28] = randomAddress2.Line4; + values[29] = randomAddress2.Postcode.Value; - //date of address change is unknown for 50% of records - if (r.Next(0, 2) == 0) - { - //get after birth but before dtCreated/date of death - values[30] = GetRandomDate(person.DateOfBirth, GetMinimum(person.DateOfDeath,(DateTime)values[1]),r); - } - } + //date of address change is unknown for 50% of records + if (r.Next(0,2) == 0) + //get after birth but before dtCreated/date of death + values[30] = GetRandomDate(person.DateOfBirth,GetMinimum(person.DateOfDeath,(DateTime)values[1]),r); + } - //an always null field, why not?! - values[31] = null; + //an always null field, why not?! + values[31] = null; - var gp_accept_date = GetRandomDateAfter(person.DateOfBirth, r); + var gpAcceptDate = GetRandomDateAfter(person.DateOfBirth, r); - //current_gp_accept_date - values[32] = gp_accept_date; + //current_gp_accept_date + values[32] = gpAcceptDate; - //before 1980 some records will be missing forename (deliberate errors!) - if (gp_accept_date.Year < 1980) - if (r.Next(gp_accept_date.Year - Person.MinimumYearOfBirth) == 0)//the farther back you go the more likely they are to be missing a forename - values[8] = null;//some people are randomly missing a forename + //before 1980 some records will be missing forename (deliberate errors!) + if (gpAcceptDate.Year < 1980 && r.Next(gpAcceptDate.Year - Person.MinimumYearOfBirth) == 0) //the farther back you go the more likely they are to be missing a forename + values[8] = null; //some people are randomly missing a forename - if(r.Next(0,3)==0) - { - values[33] = GetRandomGPCode(r); - values[34] = GetRandomDateAfter((DateTime) values[32], r); - } + if (r.Next(0,3) == 0) + { + values[33] = GetRandomGPCode(r); + values[34] = GetRandomDateAfter((DateTime)values[32],r); + } - values[35] = GetRandomDate(person.DateOfBirth, GetMinimum(person.DateOfDeath, (DateTime)values[1]), r); - values[36] = person.DateOfBirth; - values[37] = GetRandomDouble(r); + values[35] = GetRandomDate(person.DateOfBirth,GetMinimum(person.DateOfDeath,(DateTime)values[1]),r); + values[36] = person.DateOfBirth; + values[37] = GetRandomDouble(r); - //data load run id will be batches 1 (1900 is first year of possible dtCreated) to 12 (2015 - 1890 / 10 = 12) - values[38] = (((DateTime) values[1]).Year - 1890)/10; + //data load run id will be batches 1 (1900 is first year of possible dtCreated) to 12 (2015 - 1890 / 10 = 12) + values[38] = (((DateTime)values[1]).Year - 1890) / 10; - return values; - } + return values; + } private static DateTime GetMinimum(DateTime? date1, DateTime date2) { - if (date1 == null) - return date2; + if (date1 == null) + return date2; - if (date2 > date1) - return (DateTime)date1; + if (date2 > date1) + return (DateTime)date1; - return date2; - } + return date2; + } /// - protected override string[] GetHeaders() - { - return [ - "chi", //0 - "dtCreated", //1 - "current_record", //2 - "notes", //3 - "chi_num_of_curr_record", //4 - "chi_status", //5 - "century", //6 - "surname", //7 - "forename", //8 - "sex", //9 - "current_address_L1", //10 - "current_address_L2", //11 - "current_address_L3", //12 - "current_address_L4", //13 - "current_postcode", //14 - "date_of_death", //15 - "source_death", //16 - "area_residence", //17 - "hb_extract", //18 - "current_gp", //19 - "birth_surname", //20 - "previous_surname", //21 - "midname", //22 - "alt_forename", //23 - "other_initials", //24 - "previous_address_L1", //25 - "previous_address_L2", //26 - "previous_address_L3", //27 - "previous_address_L4", //28 - "previous_postcode", //29 - "date_address_changed", //30 - "adr", //31 - "current_gp_accept_date", //32 - "previous_gp", //33 - "previous_gp_accept_date", //34 - "date_into_practice", //35 - "date_of_birth", //36 - "patient_triage_score", //37 - "hic_dataLoadRunID" //38 - ]; - } - + protected override string[] GetHeaders() => + [ + "chi", //0 + "dtCreated", //1 + "current_record", //2 + "notes", //3 + "chi_num_of_curr_record", //4 + "chi_status", //5 + "century", //6 + "surname", //7 + "forename", //8 + "sex", //9 + "current_address_L1", //10 + "current_address_L2", //11 + "current_address_L3", //12 + "current_address_L4", //13 + "current_postcode", //14 + "date_of_death", //15 + "source_death", //16 + "area_residence", //17 + "hb_extract", //18 + "current_gp", //19 + "birth_surname", //20 + "previous_surname", //21 + "midname", //22 + "alt_forename", //23 + "other_initials", //24 + "previous_address_L1", //25 + "previous_address_L2", //26 + "previous_address_L3", //27 + "previous_address_L4", //28 + "previous_postcode", //29 + "date_address_changed", //30 + "adr", //31 + "current_gp_accept_date", //32 + "previous_gp", //33 + "previous_gp_accept_date", //34 + "date_into_practice", //35 + "date_of_birth", //36 + "patient_triage_score", //37 + "hic_dataLoadRunID" //38 + ]; } \ No newline at end of file diff --git a/SynthEHR.Core/Datasets/HospitalAdmissionsRecord.cs b/SynthEHR.Core/Datasets/HospitalAdmissionsRecord.cs index ce3c7c3..4e73fe3 100644 --- a/SynthEHR.Core/Datasets/HospitalAdmissionsRecord.cs +++ b/SynthEHR.Core/Datasets/HospitalAdmissionsRecord.cs @@ -209,7 +209,7 @@ static HospitalAdmissionsRecord() rowCount++; } - var operationsTable = new DataTable(); + using var operationsTable = new DataTable(); operationsTable.BeginLoadData(); operationsTable.Columns.Add("CountOfRecords", typeof(int));