diff --git a/plugins/affiliation/src/main.rs b/plugins/affiliation/src/main.rs index 53dc0dc0..a967a8c7 100644 --- a/plugins/affiliation/src/main.rs +++ b/plugins/affiliation/src/main.rs @@ -234,7 +234,7 @@ mod chunk { let mut out = vec![]; let mut made_progress = true; - while hashes.len() > 0 && made_progress { + while !hashes.is_empty() && made_progress { made_progress = false; let mut curr = vec![]; let mut remaining = max_chunk_size; @@ -354,7 +354,7 @@ async fn affiliation(engine: &mut PluginEngine, key: Target) -> Result let chunked_hashes = chunk::chunk_hashes(hashes, chunk::GRPC_EFFECTIVE_MAX_SIZE)?; let mut commit_views: Vec = vec![]; - for (i, hashes) in chunked_hashes.into_iter().enumerate() { + for hashes in chunked_hashes { // Repo with the hash of every commit let commit_batch_repo = BatchGitRepo { local: repo.clone(), @@ -368,7 +368,6 @@ async fn affiliation(engine: &mut PluginEngine, key: Target) -> Result log::error!("failed to get contributors for commits: {}", e); Error::UnspecifiedQueryState })?; - println!("Finished batch contrib #{i}"); let views: Vec = serde_json::from_value(commit_values) .map_err(|_| Error::UnexpectedPluginQueryInputFormat)?; commit_views.extend(views.into_iter()); diff --git a/plugins/git/src/data.rs b/plugins/git/src/data.rs index cfa28f27..7e475785 100644 --- a/plugins/git/src/data.rs +++ b/plugins/git/src/data.rs @@ -21,7 +21,7 @@ pub struct DetailedGitRepo { } /// Commits as they come directly out of `git log`. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] pub struct RawCommit { pub hash: String, diff --git a/plugins/git/src/local.rs b/plugins/git/src/local.rs deleted file mode 100644 index d7cfc9a4..00000000 --- a/plugins/git/src/local.rs +++ /dev/null @@ -1,160 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Copies of functions from main.rs that do not run as queries -//! This is a temporary solution until batching is implemented - -use crate::{ - data::{Commit, CommitContributor, CommitContributorView, Contributor, ContributorView}, - util::git_command::get_commits, -}; -use hipcheck_sdk::{prelude::*, types::LocalGitRepo}; - -/// Returns all commits extracted from the repository -pub fn local_commits(repo: LocalGitRepo) -> Result> { - let path = &repo.path; - let raw_commits = get_commits(path).map_err(|e| { - log::error!("failed to get raw commits: {}", e); - Error::UnspecifiedQueryState - })?; - let commits = raw_commits - .iter() - .map(|raw| Commit { - hash: raw.hash.to_owned(), - written_on: raw.written_on.to_owned(), - committed_on: raw.committed_on.to_owned(), - }) - .collect(); - - Ok(commits) -} - -/// Returns all contributors to the repository -pub fn local_contributors(repo: LocalGitRepo) -> Result> { - let path = &repo.path; - let raw_commits = get_commits(path).map_err(|e| { - log::error!("failed to get raw commits: {}", e); - Error::UnspecifiedQueryState - })?; - - let mut contributors: Vec<_> = raw_commits - .iter() - .flat_map(|raw| [raw.author.to_owned(), raw.committer.to_owned()]) - .collect(); - - contributors.sort(); - contributors.dedup(); - - Ok(contributors) -} - -/// Returns the commits associated with a given contributor (identified by e-mail address in the `details` value) -pub fn local_commits_for_contributor( - all_commits: &[Commit], - contributors: &[Contributor], - commit_contributors: &[CommitContributor], - email: &str, -) -> Result { - // Get the index of the contributor - let contributor_id = contributors - .iter() - .position(|c| c.email == email) - .ok_or_else(|| { - log::error!("failed to find contributor"); - Error::UnspecifiedQueryState - })?; - - // Get the contributor - let contributor = contributors[contributor_id].clone(); - - // Find commits that have that contributor - let commits = commit_contributors - .iter() - .filter_map(|com_con| { - if com_con.author_id == contributor_id || com_con.committer_id == contributor_id { - // SAFETY: This index is guaranteed to be valid in - // `all_commits` because of how it and `commit_contributors` - // are constructed from `db.raw_commits()` - Some(all_commits[com_con.commit_id].clone()) - } else { - None - } - }) - .collect(); - - Ok(ContributorView { - contributor, - commits, - }) -} - -/// Returns the contributor view for a given commit (idenftied by hash in the `details` field) -pub fn local_contributors_for_commit( - commits: &[Commit], - contributors: &[Contributor], - commit_contributors: &[CommitContributor], - hash: &str, -) -> Result { - // Get the index of the commit - let commit_id = commits.iter().position(|c| c.hash == hash).ok_or_else(|| { - log::error!("failed to find contributor"); - Error::UnspecifiedQueryState - })?; - - // Get the commit - let commit = commits[commit_id].clone(); - - // Find the author and committer for that commit - commit_contributors - .iter() - .find(|com_con| com_con.commit_id == commit_id) - .map(|com_con| { - // SAFETY: These indices are guaranteed to be valid in - // `contributors` because of how `commit_contributors` is - // constructed from it. - let author = contributors[com_con.author_id].clone(); - let committer = contributors[com_con.committer_id].clone(); - - CommitContributorView { - commit, - author, - committer, - } - }) - .ok_or_else(|| { - log::error!("failed to find contributor info"); - Error::UnspecifiedQueryState - }) -} - -pub fn local_commit_contributors( - repo: LocalGitRepo, - contributors: &[Contributor], -) -> Result> { - let path = &repo.path; - let raw_commits = get_commits(path).map_err(|e| { - log::error!("failed to get raw commits: {}", e); - Error::UnspecifiedQueryState - })?; - - let commit_contributors = raw_commits - .iter() - .enumerate() - .map(|(commit_id, raw)| { - // SAFETY: These `position` calls are guaranteed to return `Some` - // given how `contributors` is constructed from `db.raw_commits()` - let author_id = contributors.iter().position(|c| c == &raw.author).unwrap(); - let committer_id = contributors - .iter() - .position(|c| c == &raw.committer) - .unwrap(); - - CommitContributor { - commit_id, - author_id, - committer_id, - } - }) - .collect(); - - Ok(commit_contributors) -} diff --git a/plugins/git/src/main.rs b/plugins/git/src/main.rs index 50d11fee..ab31f0d4 100644 --- a/plugins/git/src/main.rs +++ b/plugins/git/src/main.rs @@ -3,18 +3,13 @@ //! Plugin containing secondary queries that return information about a Git repo to another query mod data; -mod local; mod parse; mod util; use crate::{ data::{ Commit, CommitContributor, CommitContributorView, CommitDiff, Contributor, ContributorView, - DetailedGitRepo, Diff, - }, - local::{ - local_commit_contributors, local_commits, local_commits_for_contributor, - local_contributors, local_contributors_for_commit, + DetailedGitRepo, Diff, RawCommit, }, util::git_command::{get_commits, get_commits_from_date, get_diffs}, }; @@ -37,6 +32,14 @@ pub struct BatchGitRepo { pub details: Vec, } +/// Returns all raw commits extracted from the repository +fn local_raw_commits(repo: LocalGitRepo) -> Result> { + get_commits(&repo.path).map_err(|e| { + log::error!("failed to get raw commits: {}", e); + Error::UnspecifiedQueryState + }) +} + /// Returns the date of the most recent commit to a Git repo as `jiff:Timestamp` displayed as a String /// (Which means that anything expecting a `Timestamp` must parse the output of this query appropriately) #[query] @@ -235,6 +238,8 @@ async fn commits_for_contributor( }) } +use std::collections::{HashMap, HashSet}; + // Temporary query to call multiple commits_for_contributors() queries until we implement batching // TODO: Remove this query once batching works #[query] @@ -247,27 +252,68 @@ async fn batch_commits_for_contributor( let mut views = Vec::new(); - let commits = local_commits(local.clone()).map_err(|e| { + let raw_commits = local_raw_commits(local.clone()).map_err(|e| { log::error!("failed to get commits: {}", e); Error::UnspecifiedQueryState })?; - let contributors = local_contributors(local.clone()).map_err(|e| { - log::error!("failed to get contributors: {}", e); - Error::UnspecifiedQueryState - })?; - let commit_contributors = - local_commit_contributors(local.clone(), &contributors).map_err(|e| { - log::error!("failed to get join table: {}", e); - Error::UnspecifiedQueryState - })?; + let commits: Vec = raw_commits + .iter() + .map(|raw| Commit { + hash: raw.hash.to_owned(), + written_on: raw.written_on.to_owned(), + committed_on: raw.committed_on.to_owned(), + }) + .collect(); + // @Assert - raw_commit and commits idxes correspond + + // Map contributors to the set of commits (by idx) they have contributed to + let mut contrib_to_commits: HashMap> = HashMap::default(); + // Map an email to a contributor + let mut email_to_contrib: HashMap = HashMap::default(); + + fn add_contributor( + map: &mut HashMap>, + c: &Contributor, + commit_id: usize, + ) { + let cv = match map.get_mut(c) { + Some(v) => v, + None => { + map.insert(c.clone(), HashSet::new()); + map.get_mut(c).unwrap() + } + }; + cv.insert(commit_id); + } + + // For each commit, update the contributors' entries in the above maps + for (i, commit) in raw_commits.iter().enumerate() { + add_contributor(&mut contrib_to_commits, &commit.author, i); + email_to_contrib.insert(commit.author.email.clone(), commit.author.clone()); + add_contributor(&mut contrib_to_commits, &commit.committer, i); + email_to_contrib.insert(commit.committer.email.clone(), commit.committer.clone()); + } for email in emails { - views.push(local_commits_for_contributor( - &commits, - &contributors, - &commit_contributors, - &email, - )?); + // Get a contributor from their email + let contributor = email_to_contrib + .get(&email) + .ok_or_else(|| { + log::error!("failed to find contributor"); + Error::UnspecifiedQueryState + })? + .clone(); + // Resolve all commits that contributor touched by idx + let commits = contrib_to_commits + .get(&contributor) + .unwrap() + .iter() + .map(|i| commits.get(*i).unwrap().clone()) + .collect::>(); + views.push(ContributorView { + contributor, + commits, + }); } Ok(views) @@ -345,29 +391,40 @@ async fn batch_contributors_for_commit( let local = repo.local; let hashes = repo.details; - let commits = local_commits(local.clone()).map_err(|e| { + let raw_commits = local_raw_commits(local.clone()).map_err(|e| { log::error!("failed to get commits: {}", e); Error::UnspecifiedQueryState })?; - let contributors = local_contributors(local.clone()).map_err(|e| { - log::error!("failed to get contributors: {}", e); - Error::UnspecifiedQueryState - })?; - let commit_contributors = - local_commit_contributors(local.clone(), &contributors).map_err(|e| { - log::error!("failed to get join table: {}", e); - Error::UnspecifiedQueryState - })?; - let mut views = Vec::new(); + let mut hash_to_idx: HashMap = HashMap::default(); + let commit_views: Vec = raw_commits + .into_iter() + .enumerate() + .map(|(i, raw)| { + let commit = Commit { + hash: raw.hash.to_owned(), + written_on: raw.written_on.to_owned(), + committed_on: raw.committed_on.to_owned(), + }; + let author = raw.author; + let committer = raw.committer; + hash_to_idx.insert(raw.hash.clone(), i); + CommitContributorView { + commit, + author, + committer, + } + }) + .collect(); + + let mut views: Vec = vec![]; for hash in hashes { - views.push(local_contributors_for_commit( - &commits, - &contributors, - &commit_contributors, - &hash, - )?); + let idx = hash_to_idx.get(&hash).ok_or_else(|| { + log::error!("hash could not be found in repo"); + Error::UnspecifiedQueryState + })?; + views.push(commit_views.get(*idx).unwrap().clone()); } Ok(views)