Skip to content

Commit

Permalink
Adding AMP FTS metrics
Browse files Browse the repository at this point in the history
These seem like they could be useful for the experiment.  I'm not sure
we'll be able to hook them up but we might as well try.

Refactored the logic to get the full keywords and added tests for it
since we're now using it in two places.
  • Loading branch information
bendk committed Jan 17, 2025
1 parent f63adf8 commit 2593471
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 49 deletions.
57 changes: 54 additions & 3 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
geoname::GeonameCache,
pocket::{split_keyword, KeywordConfidence},
provider::{AmpMatchingStrategy, SuggestionProvider},
query::FtsQuery,
query::{full_keywords_to_fts_content, FtsQuery},
rs::{
DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion,
DownloadedExposureSuggestion, DownloadedFakespotSuggestion, DownloadedMdnSuggestion,
Expand Down Expand Up @@ -451,6 +451,7 @@ impl<'a> SuggestDao<'a> {
click_url: cooked_click_url,
raw_click_url,
score,
fts_match_info: None,
})
},
)
Expand Down Expand Up @@ -498,7 +499,7 @@ impl<'a> SuggestDao<'a> {
},
|row| -> Result<Suggestion> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let title: String = row.get("title")?;
let raw_url: String = row.get("url")?;
let score: f64 = row.get("score")?;

Expand Down Expand Up @@ -526,6 +527,12 @@ impl<'a> SuggestDao<'a> {
let cooked_url = cook_raw_suggestion_url(&raw_url);
let raw_click_url = row.get::<_, String>("click_url")?;
let cooked_click_url = cook_raw_suggestion_url(&raw_click_url);
let match_info = self.fetch_amp_fts_match_info(
&fts_query,
suggestion_id,
fts_column,
&title,
)?;

Ok(Suggestion::Amp {
block_id: row.get("block_id")?,
Expand All @@ -541,6 +548,7 @@ impl<'a> SuggestDao<'a> {
click_url: cooked_click_url,
raw_click_url,
score,
fts_match_info: Some(match_info),
})
},
)
Expand All @@ -549,6 +557,49 @@ impl<'a> SuggestDao<'a> {
Ok(suggestions)
}

fn fetch_amp_fts_match_info(
&self,
fts_query: &FtsQuery<'_>,
suggestion_id: i64,
fts_column: &str,
title: &str,
) -> Result<FtsMatchInfo> {
let fts_content = match fts_column {
"title" => title.to_lowercase(),
"full_keywords" => {
let full_keyword_list: Vec<String> = self.conn.query_rows_and_then(
"
SELECT fk.full_keyword
FROM full_keywords fk
JOIN keywords k on fk.id == k.full_keyword_id
WHERE k.suggestion_id = ?
",
(suggestion_id,),
|row| row.get(0),
)?;
full_keywords_to_fts_content(full_keyword_list.iter().map(String::as_str))
}
// fts_column comes from the code above and we know there's only 2 possibilities
_ => unreachable!(),
};

let prefix = if fts_query.is_prefix_query {
// If the query was a prefix match query then test if the query without the prefix
// match would have also matched. If not, then this counts as a prefix match.
let sql = "SELECT 1 FROM amp_fts WHERE rowid = ? AND amp_fts MATCH ?";
let params = (&suggestion_id, &fts_query.match_arg_without_prefix_match);
!self.conn.exists(sql, params)?
} else {
// If not, then it definitely wasn't a prefix match
false
};

Ok(FtsMatchInfo {
prefix,
stemming: fts_query.match_required_stemming(&fts_content),
})
}

/// Fetches Suggestions of type Wikipedia provider that match the given query
pub fn fetch_wikipedia_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
let keyword_lowercased = &query.keyword.to_lowercase();
Expand Down Expand Up @@ -1116,7 +1167,7 @@ impl<'a> SuggestDao<'a> {
if enable_fts {
fts_insert.execute(
suggestion_id,
&common_details.full_keywords_joined(),
&common_details.full_keywords_fts_column(),
&common_details.title,
)?;
}
Expand Down
45 changes: 45 additions & 0 deletions components/suggest/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::collections::HashSet;

use crate::{LabeledTimingSample, Suggestion, SuggestionProvider, SuggestionProviderConstraints};

/// A query for suggestions to show in the address bar.
Expand Down Expand Up @@ -226,9 +228,31 @@ impl<'a> FtsQuery<'a> {
}
}

/// Given a list of full keywords, create an FTS string to match against.
///
/// Creates a string with de-duped keywords.
pub fn full_keywords_to_fts_content<'a>(
full_keywords: impl IntoIterator<Item = &'a str>,
) -> String {
let parts: HashSet<_> = full_keywords
.into_iter()
.flat_map(str::split_whitespace)
.map(str::to_lowercase)
.collect();
let mut result = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
result.push(' ');
}
result.push_str(&part);
}
result
}

#[cfg(test)]
mod test {
use super::*;
use std::collections::HashMap;

fn check_parse_keywords(input: &str, expected: Vec<&str>) {
let query = SuggestionQuery::all_providers(input);
Expand Down Expand Up @@ -297,4 +321,25 @@ mod test {
// characters).
assert!(FtsQuery::new("run").match_required_stemming("running shoes"));
}

#[test]
fn test_full_keywords_to_fts_content() {
check_full_keywords_to_fts_content(["a", "b", "c"], "a b c");
check_full_keywords_to_fts_content(["a", "b c"], "a b c");
check_full_keywords_to_fts_content(["a", "b c a"], "a b c");
check_full_keywords_to_fts_content(["a", "b C A"], "a b c");
}

fn check_full_keywords_to_fts_content<const N: usize>(input: [&str; N], expected: &str) {
let mut expected_counts = HashMap::<&str, usize>::new();
let mut actual_counts = HashMap::<&str, usize>::new();
for term in expected.split_whitespace() {
*expected_counts.entry(term).or_default() += 1;
}
let fts_content = full_keywords_to_fts_content(input);
for term in fts_content.split_whitespace() {
*actual_counts.entry(term).or_default() += 1;
}
assert_eq!(actual_counts, expected_counts);
}
}
24 changes: 7 additions & 17 deletions components/suggest/src/rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@
//! the new suggestion in their results, and return `Suggestion::T` variants
//! as needed.
use std::{collections::HashSet, fmt};
use std::fmt;

use remote_settings::{Attachment, RemoteSettingsRecord};
use serde::{Deserialize, Deserializer};

use crate::{db::SuggestDao, error::Error, provider::SuggestionProvider, Result};
use crate::{
db::SuggestDao, error::Error, provider::SuggestionProvider,
query::full_keywords_to_fts_content, Result,
};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Collection {
Expand Down Expand Up @@ -437,21 +440,8 @@ impl DownloadedSuggestionCommonDetails {
)
}

/// Get the full keywords as a single string
pub fn full_keywords_joined(&self) -> String {
let parts: HashSet<_> = self
.full_keywords
.iter()
.flat_map(|(s, _)| s.split_whitespace())
.collect();
let mut result = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
result.push(' ');
}
result.push_str(part);
}
result
pub fn full_keywords_fts_column(&self) -> String {
full_keywords_to_fts_content(self.full_keywords.iter().map(|(s, _)| s.as_str()))
}
}

Expand Down
Loading

0 comments on commit 2593471

Please sign in to comment.