From f2f8f894a75f9a3e87d78d9882ae52ab60bb94cf Mon Sep 17 00:00:00 2001 From: Ronan McGovern <78278410+RonanKMcGovern@users.noreply.github.com> Date: Mon, 16 Oct 2023 22:30:57 +0100 Subject: [PATCH 1/4] Create coins_max_entropy.py --- coins_max_entropy.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 coins_max_entropy.py diff --git a/coins_max_entropy.py b/coins_max_entropy.py new file mode 100644 index 0000000..39a2a17 --- /dev/null +++ b/coins_max_entropy.py @@ -0,0 +1,34 @@ +## Finds the most likely p, by maximising the entropy given k heads out of n tosses subject to that constraint of k being heads out of n tosses + +from scipy.optimize import minimize +from scipy.special import comb +import numpy as np + +# Define Shannon entropy for Binomial distribution +def shannon_entropy(p, n): + p = p[0] # Extract p from the array + terms = [comb(n, k) * (p ** k) * ((1 - p) ** (n - k)) for k in range(n + 1)] + entropy = -np.sum(term * np.log2(term) for term in terms if term > 0) # Exclude zero terms + return -entropy # Minimize the negative entropy to maximize entropy + +# Define constraint for the mean (x/n) +def constraint_mean(p, n, x): + return n * p[0] - x + +# Number of trials (n) and observed successes (x) +n = 10 +x = 3 + +# Initial guess for p +initial_p = [0.5] + +# Constraint for mean +con = {'type': 'eq', 'fun': constraint_mean, 'args': (n, x)} + +# Perform optimization to maximize entropy +result = minimize(shannon_entropy, initial_p, args=(n,), constraints=[con], bounds=[(0, 1)]) + +# Extract optimized p value +optimized_p = result.x[0] + +print(f"Optimized p to maximize entropy: {optimized_p}") From 33f1e5c92f7f1fadef08dc7c708236c77249006d Mon Sep 17 00:00:00 2001 From: Ronan McGovern <78278410+RonanKMcGovern@users.noreply.github.com> Date: Mon, 16 Oct 2023 22:42:01 +0100 Subject: [PATCH 2/4] Create stars-max-entropy.py --- stars-max-entropy.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 stars-max-entropy.py diff --git a/stars-max-entropy.py b/stars-max-entropy.py new file mode 100644 index 0000000..6b8e26b --- /dev/null +++ b/stars-max-entropy.py @@ -0,0 +1,32 @@ +# Define Shannon entropy for Multinomial distribution with a fixed number of samples +def shannon_entropy_multinomial_fixed_samples(p, n): + entropy = -np.sum(p_i * np.log2(p_i) for p_i in p if p_i > 0) # Exclude zero terms + return -n * entropy # Account for the number of samples + +# Define constraint for the average rating with a fixed number of samples +def constraint_average_fixed_samples(p, avg, n): + weighted_sum = np.sum(i * p_i for i, p_i in enumerate(p, start=1)) + return n * weighted_sum - n * avg # Account for the number of samples + +# Known average review and number of reviews +# Takes in the average rating and number of stars and outputs the p distribution given max entropy + +avg_review = 3.5 +num_reviews = 10 + +# Initial guess for p +initial_p = [0.2, 0.2, 0.2, 0.2, 0.2] + +# Constraint for average review +con_avg = {'type': 'eq', 'fun': constraint_average_fixed_samples, 'args': (avg_review, num_reviews)} + +# Additional constraint for probabilities to sum to 1 +con_sum = {'type': 'eq', 'fun': lambda p: np.sum(p) - 1} + +# Perform optimization to maximize entropy +result_fixed_samples = minimize(shannon_entropy_multinomial_fixed_samples, initial_p, args=(num_reviews,), constraints=[con_avg, con_sum], bounds=[(0, 1)]*5) + +# Extract optimized p values +optimized_p_multinomial_fixed_samples = result_fixed_samples.x + +optimized_p_multinomial_fixed_samples From 850da14bd61c1af20a89456fc4c6c137547a5d16 Mon Sep 17 00:00:00 2001 From: Ronan McGovern <78278410+RonanKMcGovern@users.noreply.github.com> Date: Mon, 16 Oct 2023 23:02:40 +0100 Subject: [PATCH 3/4] Rename coins_max_entropy.py to coins-max-entropy.py --- coins_max_entropy.py => coins-max-entropy.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename coins_max_entropy.py => coins-max-entropy.py (100%) diff --git a/coins_max_entropy.py b/coins-max-entropy.py similarity index 100% rename from coins_max_entropy.py rename to coins-max-entropy.py From 1e029a8182e2e15d35ba2c74a92d3de5ef27a849 Mon Sep 17 00:00:00 2001 From: Ronan McGovern <78278410+RonanKMcGovern@users.noreply.github.com> Date: Mon, 16 Oct 2023 23:02:56 +0100 Subject: [PATCH 4/4] Update stars-max-entropy.py --- stars-max-entropy.py | 73 +++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/stars-max-entropy.py b/stars-max-entropy.py index 6b8e26b..adcb18d 100644 --- a/stars-max-entropy.py +++ b/stars-max-entropy.py @@ -1,3 +1,7 @@ +import matplotlib.pyplot as plt +from scipy.optimize import minimize +import numpy as np + # Define Shannon entropy for Multinomial distribution with a fixed number of samples def shannon_entropy_multinomial_fixed_samples(p, n): entropy = -np.sum(p_i * np.log2(p_i) for p_i in p if p_i > 0) # Exclude zero terms @@ -8,25 +12,50 @@ def constraint_average_fixed_samples(p, avg, n): weighted_sum = np.sum(i * p_i for i, p_i in enumerate(p, start=1)) return n * weighted_sum - n * avg # Account for the number of samples -# Known average review and number of reviews -# Takes in the average rating and number of stars and outputs the p distribution given max entropy - -avg_review = 3.5 -num_reviews = 10 - -# Initial guess for p -initial_p = [0.2, 0.2, 0.2, 0.2, 0.2] - -# Constraint for average review -con_avg = {'type': 'eq', 'fun': constraint_average_fixed_samples, 'args': (avg_review, num_reviews)} - -# Additional constraint for probabilities to sum to 1 -con_sum = {'type': 'eq', 'fun': lambda p: np.sum(p) - 1} - -# Perform optimization to maximize entropy -result_fixed_samples = minimize(shannon_entropy_multinomial_fixed_samples, initial_p, args=(num_reviews,), constraints=[con_avg, con_sum], bounds=[(0, 1)]*5) - -# Extract optimized p values -optimized_p_multinomial_fixed_samples = result_fixed_samples.x - -optimized_p_multinomial_fixed_samples +# Function to perform optimization for given number of reviews and average rating +def optimize_probs(num_reviews, avg_review): + # Initial guess for p + initial_p = [0.2, 0.2, 0.2, 0.2, 0.2] + + # Constraint for average review + con_avg = {'type': 'eq', 'fun': constraint_average_fixed_samples, 'args': (avg_review, num_reviews)} + + # Additional constraint for probabilities to sum to 1 + con_sum = {'type': 'eq', 'fun': lambda p: np.sum(p) - 1} + + # Perform optimization to maximize entropy + result = minimize(shannon_entropy_multinomial_fixed_samples, initial_p, args=(num_reviews,), constraints=[con_avg, con_sum], bounds=[(0, 1)]*5) + + # Extract optimized p values + return result.x + +# Known average review +avg_review = 4.0 + +# Different number of reviews +num_reviews_list = [4, 8, 16, float('inf')] + +# Store optimized probabilities +optimized_probs = {} + +# Perform optimization for each number of reviews +for num_reviews in num_reviews_list: + optimized_p = optimize_probs(num_reviews, avg_review) if num_reviews != float('inf') else optimize_probs(10000, avg_review) # Use a large number to approximate infinity + optimized_probs[num_reviews] = optimized_p + +# Adjusted plot to show a dot plot for each case +fig, ax = plt.subplots() + +# Different markers for better visibility +markers = ['o', 's', '^', 'x'] + +for i, (num_reviews, optimized_p) in enumerate(optimized_probs.items()): + ax.plot(range(1, 6), optimized_p, marker=markers[i], linestyle='-', label=f'{num_reviews} Reviews') + +# Labels and title +ax.set_xlabel('Star Rating') +ax.set_ylabel('Probability') +ax.set_title('Optimized Probabilities for Different Numbers of Reviews') +ax.legend() + +plt.show()