dna_polarisation and autoplot in R

leifeld · Jan 10, 2025 · a9ce5ec · a9ce5ec
1 parent 2da3b5e
commit a9ce5ec
Show file tree

Hide file tree

Showing 6 changed files with 380 additions and 56 deletions.
diff --git a/dna/src/main/java/dna/Dna.java b/dna/src/main/java/dna/Dna.java
@@ -17,7 +17,7 @@ public class Dna {
 	public static Dna dna;
 	public static Logger logger;
 	public static Sql sql;
-	public static final String date = "2025-01-09";
+	public static final String date = "2025-01-10";
 	public static final String version = "3.0.11.4";
 	public static final String operatingSystem = System.getProperty("os.name");
 	public static File workingDirectory = null;

diff --git a/dna/src/main/java/dna/HeadlessDna.java b/dna/src/main/java/dna/HeadlessDna.java
@@ -631,9 +631,9 @@ public void rTimeWindow(String networkType, String statementType, String variabl
 	 * @param mutationPercentage The probability with which each bit in any cluster solution is selected for mutation after the cross-over step. For example 0.1 to select 10% of the nodes to swap their memberships.
 	 * @param qualityFunction    The quality function to evaluate cluster solutions. Supported values are "modularity" and "eiIndex".
 	 * @param randomSeed         The random seed to use for the random number generator. Pass 0 for random behaviour.
-	 * @return                   An array list of {@link PolarisationResult} objects.
+	 * @return                   A PolarisationResultTimeSeries object containing the results of the genetic algorithm for each time step and iteration.
 	 */
-	public ArrayList<PolarisationResult> rPolarisation(String statementType, String variable1, boolean variable1Document,
+	public PolarisationResultTimeSeries rPolarisation(String statementType, String variable1, boolean variable1Document,
 			String variable2, boolean variable2Document, String qualifier, String normalization, String duplicates,
 			String startDate, String stopDate, String timeWindow, int windowSize, String kernel, boolean indentTime,
 			String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources,
@@ -705,10 +705,10 @@ public ArrayList<PolarisationResult> rPolarisation(String statementType, String
 
 		// step 3: compute and normalise networks
 		this.exporter.computeKernelSmoothedTimeSlices(false);
-		this.exporter.normalizeMatrixResultsToOne();
+		this.exporter.normaliseMatrices();
 
 		// step 4: compute polarisation
-		ArrayList<PolarisationResult> polarisationResults = this.exporter.geneticAlgorithm(numParents, k, iterations, elitePercentage, mutationPercentage, qualityFunction, randomSeed);
+		PolarisationResultTimeSeries polarisationResults = this.exporter.geneticAlgorithm(numParents, k, iterations, elitePercentage, mutationPercentage, qualityFunction, randomSeed);
 		return polarisationResults;
 	}
 

diff --git a/dna/src/main/java/dna/export/Exporter.java b/dna/src/main/java/dna/export/Exporter.java
@@ -2111,8 +2111,7 @@ private double zeta(LocalDateTime t, LocalDateTime gamma, int w, String timeWind
 	}
 
 	/**
-	 * Normalize all values in each results matrix to make them sum to 1.0. Useful for phase transition methods. Called
-	 * directly from R.
+	 * Normalize all values in each results matrix to make them sum to 1.0. Useful for phase transition methods.
 	 */
 	public void normalizeMatrixResultsToOne() {
 		try (ProgressBar pb = new ProgressBar("Matrix normalization", Exporter.this.matrixResults.size())) {
@@ -2136,6 +2135,35 @@ public void normalizeMatrixResultsToOne() {
 		}
 	}
 
+	/**
+	 * Normalises the matrices in the matrixResults list.
+	 * This method uses a parallel stream to process each matrix concurrently.
+	 * A progress bar is displayed to indicate the progress of the matrix normalisation.
+	 */
+	public void normaliseMatrices() {
+		ProgressBar.wrap(Exporter.this.matrixResults.stream().parallel(), "Matrix normalisation").forEach(m -> m.setMatrix(normaliseMatrix(m.getMatrix())));
+	}
+
+	/**
+	 * Normalises the given matrix by dividing each element by the matrix norm.
+	 * The matrix norm is calculated using the calculateMatrixNorm method.
+	 * If the norm is zero, the matrix remains unchanged.
+	 *
+	 * @param matrix the matrix to be normalised
+	 * @return the normalised matrix
+	 */
+	private double[][] normaliseMatrix(double[][] matrix) {
+		double norm = calculateMatrixNorm(matrix);
+		if (norm != 0.0) {
+			for (int i = 0; i < matrix.length; i++) {
+				for (int j = 0; j < matrix[0].length; j++) {
+					matrix[i][j] = matrix[i][j] / norm;
+				}
+			}
+		}
+		return matrix;
+	}
+
 	/**
 	 * Compute a distance matrix for the elements of the matrix results stored in the Exporter class.
 	 *
@@ -3809,12 +3837,11 @@ private double modularity(int[] mem, double[][] mat, int K) {
 		double m = 0.0; // Total weight of all edges
 
 		// Precompute degrees (k_i) and total edge weight (m)
-		// This corresponds to the summation \sum_j A_{ij} for k_i in the modularity
-		// formula
+		// This corresponds to the summation \sum_j A_{ij} for k_i in the modularity formula
 		for (int i = 0; i < n; i++) {
 			for (int j = 0; j < n; j++) {
 				degrees[i] += mat[i][j];
-				m += mat[i][j];
+				m += Math.abs(mat[i][j]); // absolute value to account for negative weights
 			}
 		}
 		m /= 2.0; // Divide total edge weight by 2 to account for double-counting i-j and j-i
@@ -3825,8 +3852,7 @@ private double modularity(int[] mem, double[][] mat, int K) {
 			for (int j = 0; j < n; j++) {
 				if (mem[i] == mem[j]) { // Check if nodes i and j are in the same community (\delta(c_i, c_j))
 					// Add the contribution of this pair to modularity
-					// The first term (A_{ij}) and the second term (-k_i*k_j/2m) of Equation 6 are
-					// combined here
+					// The first term (A_{ij}) and the second term (-k_i*k_j/2m) of Equation 6 are combined here
 					Q += mat[i][j] - (degrees[i] * degrees[j]) / (2.0 * m);
 				}
 			}
@@ -4575,9 +4601,9 @@ public double[] getQ() {
 	 * @param qualityFunction      The quality function to evaluate cluster solutions. Supported values are "modularity" and "eiIndex".
 	 * @param subtractList         Array list of signed network matrices, for example normalised actor networks with "subtract" qualifier aggregation.
 	 * @param randomSeed           The random seed to use for the random number generator. Pass 0 for random behaviour.
-	 * @return An array list of PolarisationResult objects, one for each time step.
+	 * @return A PolarisationResultTimeSeries object containing the results of the genetic algorithm for each time step and iteration.
 	 */
-	public ArrayList<PolarisationResult> geneticAlgorithm (
+	public PolarisationResultTimeSeries geneticAlgorithm (
 			int numClusterSolutions,
 			int k,
 			int iterations,
@@ -4640,6 +4666,7 @@ public ArrayList<PolarisationResult> geneticAlgorithm (
 		Random rng = (randomSeed == 0) ? new Random() : new Random(randomSeed); // Initialize random number generator
 
 		ArrayList<PolarisationResult> polarisationResults = ProgressBar.wrap(Exporter.this.matrixResults.stream(), "Genetic algorithm")
+		//.peek(m -> System.out.println("Matrix result number: " + Exporter.this.matrixResults.indexOf(m)))
 		.map(matrix -> geneticAlgorithmOneNetwork(
 			matrix,
 			iterations,
@@ -4650,7 +4677,8 @@ public ArrayList<PolarisationResult> geneticAlgorithm (
 			qualityFunction,
 			rng)) // Apply the function to each matrix
 			.collect(Collectors.toCollection(ArrayList::new));
-		return polarisationResults;
+		PolarisationResultTimeSeries polarisationResultTimeSeries = new PolarisationResultTimeSeries(polarisationResults);
+		return polarisationResultTimeSeries;
 	}
 
 	/**
@@ -4702,6 +4730,7 @@ private PolarisationResult geneticAlgorithmOneNetwork(Matrix subtract, int itera
 
 				// compute summary statistics based on iteration step and retain them
 				qualityScores = geneticIteration.getQ();
+				//System.out.println("Iteration " + i + ": " + Arrays.toString(qualityScores) + "\n");
 				maxQ = -1.0;
 				avgQ = 0.0;
 				sdQ = 0.0;
@@ -4742,15 +4771,16 @@ private PolarisationResult geneticAlgorithmOneNetwork(Matrix subtract, int itera
 
 			// correct for early convergence in results vectors
 			int finalIndex = lastIndex;
+			/*
 			for (int i = lastIndex; i >= 0; i--) {
 				if (maxQArray[i] == maxQArray[lastIndex]) {
 					finalIndex = i;
 				} else {
 					break;
 				}
 			}
+			*/
 
-			// System.out.println("Final length: " + finalIndex + 1 + "; values: " + Arrays.toString(maxQArray));
 			double[] maxQArrayTemp = new double[finalIndex + 1];
 			double[] avgQArrayTemp = new double[finalIndex + 1];
 			double[] sdQArrayTemp = new double[finalIndex + 1];
@@ -5054,13 +5084,18 @@ public void computeKernelSmoothedTimeSlices(boolean isolates) {
 		ArrayList<Matrix> processedResults = ProgressBar.wrap(
 				Stream.iterate(0, i -> i + 1).limit(Exporter.this.matrixResults.size()).parallel(), "Kernel smoothing")
 				.map(index -> processTimeSlice(Exporter.this.matrixResults.get(index), xArrayList.get(index)))
-				//.map(m -> removeNegativeValuesFromMatrix(m)) // TODO: check if needed
+				.map(m -> {
+					for (int i = 0; i < m.getMatrix().length; i++) {
+						m.getMatrix()[i][i] = 0.0; // set diagonal to zero
+					}
+					return m;
+				})
+				.map(m -> removeNegativeValuesFromMatrix(m)) // TODO: check if needed
 				.collect(Collectors.toCollection(ArrayList::new));
 		Exporter.this.matrixResults = processedResults;
 	}
 
-	/*
-	 * TODO: remove if not needed
+	// TODO: remove if not needed
 	private Matrix removeNegativeValuesFromMatrix(Matrix m) {
 		for (int i = 0; i < m.getMatrix().length; i++) {
 			for (int j = 0; j < m.getMatrix()[i].length; j++) {
@@ -5071,7 +5106,6 @@ private Matrix removeNegativeValuesFromMatrix(Matrix m) {
 		}
 		return m;
 	}
-	*/
 
 	/** Create a 3D array of ExportStatements for the kernel smoothing approach (variable 1 x variable 2 x qualifier).
 	 *

diff --git a/dna/src/main/java/dna/export/PolarisationResultTimeSeries.java b/dna/src/main/java/dna/export/PolarisationResultTimeSeries.java
@@ -0,0 +1,200 @@
+package dna.export;
+
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+
+/**
+ * The PolarisationResultTimeSeries class represents a time series of PolarisationResult objects.
+ * It provides methods to retrieve various data from the time series, such as final maximum Q values,
+ * early convergence flags, date and time arrays, maximum Q values, average Q values, standard deviation
+ * of Q values, memberships, and names.
+ *
+ * <p>This class is designed to facilitate the analysis and export of polarisation results over time.</p>
+ *
+ * <p>Methods:</p>
+ * <ul>
+ *   <li>{@link #getResults()}: Returns the list of PolarisationResult objects.</li>
+ *   <li>{@link #getFinalMaxQs()}: Returns an array of final maximum Q values from the time series.</li>
+ *   <li>{@link #getEarlyConvergence()}: Returns an array of boolean values indicating early convergence for each result.</li>
+ *   <li>{@link #getDateTimeArray()}: Returns a 2D array of date and time values for start, middle, and stop times of each result.</li>
+ *   <li>{@link #getMaxQs()}: Returns a 2D array of maximum Q values for each result.</li>
+ *   <li>{@link #getAvgQs()}: Returns a 2D array of average Q values for each result.</li>
+ *   <li>{@link #getSdQs()}: Returns a 2D array of standard deviation of Q values for each result.</li>
+ *   <li>{@link #getMemberships()}: Returns a 2D array of memberships for each result.</li>
+ *   <li>{@link #getNames()}: Returns a 2D array of names for each result.</li>
+ * </ul>
+ *
+ * <p>Constructor:</p>
+ * <ul>
+ *   <li>{@link #PolarisationResultTimeSeries(ArrayList<PolarisationResult>)}: Constructs a PolarisationResultTimeSeries with the specified list of PolarisationResult objects.</li>
+ * </ul>
+ */
+public class PolarisationResultTimeSeries {
+    final ArrayList<PolarisationResult> results;
+
+    /**
+     * Constructs a PolarisationResultTimeSeries with the specified list of PolarisationResult objects.
+     *
+     * @param results an ArrayList of PolarisationResult objects representing the time series data.
+     */
+    public PolarisationResultTimeSeries(ArrayList<PolarisationResult> results) {
+        this.results = results;
+    }
+
+    /**
+     * Retrieves the list of PolarisationResult objects.
+     *
+     * @return an ArrayList containing PolarisationResult objects.
+     */
+    public ArrayList<PolarisationResult> getResults() {
+        return results;
+    }
+
+    /**
+     * Retrieves an array of final maximum Q values from the results.
+     *
+     * @return a double array containing the final maximum Q values from each PolarisationResult in the results list.
+     */
+    public double[] getFinalMaxQs() {
+        return this.results.stream().mapToDouble(PolarisationResult::getFinalMaxQ).toArray();
+    }
+
+    /**
+     * Retrieves an array indicating early convergence status for each result.
+     *
+     * @return a boolean array where each element represents whether the corresponding result
+     *         in the results list has early convergence.
+     */
+    public boolean[] getEarlyConvergence() {
+        boolean[] earlyConvergence = new boolean[this.results.size()];
+        for (int i = 0; i < this.results.size(); i++) {
+            earlyConvergence[i] = this.results.get(i).isEarlyConvergence();
+        }
+        return earlyConvergence;
+    }
+
+    /**
+     * Generates a 2D array representing the date and time components of the start, middle, 
+     * and stop times for each result in the results list.
+     * 
+     * The returned array has a size of [number of results][18], where each row corresponds 
+     * to a result and contains the following date and time components:
+     * - start year
+     * - start month
+     * - start day of month
+     * - start hour
+     * - start minute
+     * - start second
+     * - middle year
+     * - middle month
+     * - middle day of month
+     * - middle hour
+     * - middle minute
+     * - middle second
+     * - stop year
+     * - stop month
+     * - stop day of month
+     * - stop hour
+     * - stop minute
+     * - stop second
+     * 
+     * @return a 2D array of integers representing the date and time components of the results.
+     */
+    public int[][] getDateTimeArray() {
+        int[][] dateTimeArray = new int[this.results.size()][18];
+        for (int i = 0; i < this.results.size(); i++) {
+            LocalDateTime start = this.results.get(i).getStart();
+            LocalDateTime middle = this.results.get(i).getMiddle();
+            LocalDateTime stop = this.results.get(i).getStop();
+            dateTimeArray[i][0] = start.getYear();
+            dateTimeArray[i][1] = start.getMonthValue();
+            dateTimeArray[i][2] = start.getDayOfMonth();
+            dateTimeArray[i][3] = start.getHour();
+            dateTimeArray[i][4] = start.getMinute();
+            dateTimeArray[i][5] = start.getSecond();
+            dateTimeArray[i][6] = middle.getYear();
+            dateTimeArray[i][7] = middle.getMonthValue();
+            dateTimeArray[i][8] = middle.getDayOfMonth();
+            dateTimeArray[i][9] = middle.getHour();
+            dateTimeArray[i][10] = middle.getMinute();
+            dateTimeArray[i][11] = middle.getSecond();
+            dateTimeArray[i][12] = stop.getYear();
+            dateTimeArray[i][13] = stop.getMonthValue();
+            dateTimeArray[i][14] = stop.getDayOfMonth();
+            dateTimeArray[i][15] = stop.getHour();
+            dateTimeArray[i][16] = stop.getMinute();
+            dateTimeArray[i][17] = stop.getSecond();
+        }
+        return dateTimeArray;
+    }
+
+    /**
+     * Retrieves the maximum Q values from the results.
+     *
+     * @return a 2D array of double values where each sub-array contains the maximum Q values
+     *         for a corresponding result in the results list. Each time step can have different lengths.
+     */
+    public double[][] getMaxQs() {
+        double[][] qs = new double[this.results.size()][];
+        for (int i = 0; i < this.results.size(); i++) {
+            qs[i] = this.results.get(i).getMaxQ();
+        }
+        return qs;
+    }
+
+    /**
+     * Retrieves the average Q values from the results.
+     *
+     * @return a 2D array of double values where each sub-array represents the average Q values
+     *         for a specific result in the results list. Each time step can have different lengths.
+     */
+    public double[][] getAvgQs() {
+        double[][] qs = new double[this.results.size()][];
+        for (int i = 0; i < this.results.size(); i++) {
+            qs[i] = this.results.get(i).getAvgQ();
+        }
+        return qs;
+    }
+
+    /**
+     * Retrieves the standard deviation Q values for each result in the time series.
+     *
+     * @return A 2D array where each sub-array contains the standard deviation Q values
+     *         for a specific result in the time series. Each time step can have different lengths.
+     */
+    public double[][] getSdQs() {
+        double[][] qs = new double[this.results.size()][];
+        for (int i = 0; i < this.results.size(); i++) {
+            qs[i] = this.results.get(i).getSdQ();
+        }
+        return qs;
+    }
+
+    /**
+     * Retrieves the final cluster memberships from the results.
+     *
+     * @return a 2D array of integers representing the memberships for each result. Each time step can have different lengths.
+     */
+    public int[][] getMemberships() {
+        int[][] memberships = new int[this.results.size()][];
+        for (int i = 0; i < this.results.size(); i++) {
+            memberships[i] = this.results.get(i).getMemberships();
+        }
+        return memberships;
+    }
+
+    /**
+     * Retrieves a 2D array of names from the results.
+     * Each element in the outer array corresponds to a result,
+     * and each inner array contains the names associated with that result.
+     *
+     * @return a 2D array of names, where each inner array contains the names for a specific result. Each time step can have different lengths.
+     */
+    public String[][] getNames() {
+        String[][] names = new String[this.results.size()][];
+        for (int i = 0; i < this.results.size(); i++) {
+            names[i] = this.results.get(i).getNames();
+        }
+        return names;
+    }
+}
diff --git a/rDNA/rDNA/DESCRIPTION b/rDNA/rDNA/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rDNA
 Version: 3.0.11.4
-Date: 2025-01-09
+Date: 2025-01-10
 Title: Discourse Network Analysis in R
 Authors@R: 
   c(person(given = "Philip",