Skip to content

Commit

Permalink
Improve Engine.suggest()
Browse files Browse the repository at this point in the history
  • Loading branch information
bingzheung committed Jul 15, 2024
1 parent c42b551 commit 799081c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 26 deletions.
8 changes: 8 additions & 0 deletions app/src/main/java/org/jyutping/jyutping/keyboard/Candidate.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.jyutping.jyutping.keyboard

import org.jyutping.jyutping.CharacterStandard
import org.jyutping.jyutping.extensions.convertedT2S
import org.jyutping.jyutping.extensions.space

data class Candidate(
val type: CandidateType = CandidateType.Cantonese,
Expand All @@ -26,6 +27,13 @@ data class Candidate(
CandidateType.Cantonese -> (text.hashCode() * 31 + romanization.hashCode())
else -> text.hashCode()
}
operator fun plus(another: Candidate): Candidate {
val newText = this.text + another.text
val newRomanization = this.romanization + String.space + another.romanization
val newInput = this.input + another.input
val newMark = this.mark + String.space + another.mark
return Candidate(text = newText, romanization = newRomanization, input = newInput, mark = newMark)
}
}

fun Candidate.transformed(characterStandard: CharacterStandard): Candidate {
Expand Down
51 changes: 33 additions & 18 deletions app/src/main/java/org/jyutping/jyutping/keyboard/Engine.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,26 @@ import org.jyutping.jyutping.utilities.DatabaseHelper

object Engine {
fun suggest(text: String, segmentation: Segmentation, db: DatabaseHelper): List<Candidate> {
if (db.canProcess(text).not()) return emptyList()
if (segmentation.maxSchemeLength() < 1) return processVerbatim(text, db)
return process(text, segmentation, db)
if (segmentation.maxSchemeLength() < 1) {
return processVerbatim(text, db)
} else {
return process(text, segmentation, db)
}
}
private fun processVerbatim(text: String, db: DatabaseHelper): List<Candidate> {
private fun processVerbatim(text: String, db: DatabaseHelper, limit: Int? = null): List<Candidate> {
val rounds: MutableList<List<Candidate>> = mutableListOf()
for (number in text.indices) {
val leading = text.dropLast(number)
val round = db.match(text = leading, input = leading) + db.shortcut(text = leading)
val round = db.match(text = leading, input = leading, limit = limit) + db.shortcut(leading, limit)
rounds.add(round)
}
return rounds.flatten().distinct()
}
private fun process(text: String, segmentation: Segmentation, db: DatabaseHelper): List<Candidate> {
private fun process(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List<Candidate> {
val textLength = text.length
val primary = query(text, segmentation, db)
val primary = query(text, segmentation, db, limit)
val firstInputLength = primary.firstOrNull()?.input?.length ?: 0
if (firstInputLength == 0) return processVerbatim(text, db)
if (firstInputLength == 0) return processVerbatim(text, db, limit)
if (firstInputLength == textLength) return primary
val prefixes: List<Candidate> = run {
if (segmentation.maxSchemeLength() >= textLength) emptyList<Candidate>()
Expand All @@ -34,25 +36,39 @@ object Engine {
val schemeAnchors = scheme.mapNotNull { it.text.firstOrNull() }
val anchors: String = (schemeAnchors + lastAnchor).joinToString(separator = String.empty)
val text2mark = scheme.joinToString(separator = String.space) { it.text } + String.space + tail
val shortcut = db.shortcut(anchors)
val shortcut = db.shortcut(anchors, limit)
.filter { candidate -> candidate.romanization.filter { it.isDigit().not() }.startsWith(text2mark) }
.map { Candidate(text = it.text, romanization = it.romanization, input = text, mark = text2mark) }
shortcuts.add(shortcut)
}
shortcuts.flatten()
}
if (prefixes.isNotEmpty()) return prefixes + primary
return primary
val headTexts = primary.map { it.input }.distinct()
val concatenated: MutableList<List<Candidate>> = mutableListOf()
for (headText in headTexts) {
val headInputLength = headText.length
val tailText = text.drop(headInputLength)
if (db.canProcess(tailText).not()) continue
val tailSegmentation = Segmentor.segment(tailText, db)
val tailCandidates = process(text = tailText, segmentation = tailSegmentation, db = db, limit = 8).take(100)
if (tailCandidates.isEmpty()) continue
val headCandidates = primary.takeWhile { it.input == headText }.take(8)
val combines = headCandidates.map { head -> tailCandidates.map { head + it } }
concatenated.add(combines.flatten())
}
val preferredConcatenated = preferred(text, concatenated.flatten().distinct()).take(1)
return preferredConcatenated + primary
}
private fun query(text: String, segmentation: Segmentation, db: DatabaseHelper): List<Candidate> {
private fun query(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List<Candidate> {
val textLength = text.length
val searches = search(text, segmentation, db)
val searches = search(text, segmentation, db, limit)
val preferredSearched = searches.filter { it.input.length == textLength }
val matched = db.match(text = text, input = text)
val shortcut = db.shortcut(text)
val matched = db.match(text = text, input = text, limit = limit)
val shortcut = db.shortcut(text, limit)
return (matched + preferredSearched + shortcut + searches).distinct()
}
private fun search(text: String, segmentation: Segmentation, db: DatabaseHelper): List<Candidate> {
private fun search(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List<Candidate> {
val textLength = text.length
val perfectSchemes = segmentation.filter { it.length() == textLength }
if (perfectSchemes.isNotEmpty()) {
Expand All @@ -63,7 +79,7 @@ object Engine {
val pingText = slice.joinToString(separator = String.empty) { it.origin }
val inputText = slice.joinToString(separator = String.empty) { it.text }
val text2mark = slice.joinToString(separator = String.space) { it.text }
val matched = db.match(text = pingText, input = inputText, mark = text2mark)
val matched = db.match(text = pingText, input = inputText, mark = text2mark, limit = limit)
matches.add(matched)
}
}
Expand All @@ -74,13 +90,12 @@ object Engine {
val pingText = scheme.joinToString(separator = String.empty) { it.origin }
val inputText = scheme.joinToString(separator = String.empty) { it.text }
val text2mark = scheme.joinToString(separator = String.space) { it.text }
val matched = db.match(text = pingText, input = inputText, mark = text2mark)
val matched = db.match(text = pingText, input = inputText, mark = text2mark, limit = limit)
matches.add(matched)
}
return ordered(textLength, matches.flatten())
}
}

private fun preferred(text: String, candidates: List<Candidate>): List<Candidate> {
val sorted = candidates.sortedWith(compareBy({-it.input.length}, {-it.text.length}))
val matched = sorted.filter { candidate -> candidate.romanization.filter { it.isLetter() } == text }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,8 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper(
}

fun canProcess(text: String): Boolean {
val value = text.firstOrNull()?.intercode() ?: 0
if (value == 0) return false
val code = if (value == 44) 29 else value // Replace 'y' with 'j'
val value: Int = text.firstOrNull()?.intercode() ?: return false
val code: Int = if (value == 44) 29 else value // Replace 'y' with 'j'
val command = "SELECT rowid FROM lexicontable WHERE shortcut = $code LIMIT 1;"
val cursor = this.readableDatabase.rawQuery(command, null)
if (cursor.moveToFirst()) {
Expand All @@ -349,11 +348,12 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper(
return false
}
}
fun shortcut(text: String): List<Candidate> {
fun shortcut(text: String, limit: Int? = null): List<Candidate> {
val code: Int = text.shortcutCharcode() ?: 0
if (code == 0) return emptyList()
val limitValue: Int = limit ?: 50
val candidates: MutableList<Candidate> = mutableListOf()
val command = "SELECT rowid, word, romanization FROM lexicontable WHERE shortcut = $code LIMIT 50;"
val command = "SELECT rowid, word, romanization FROM lexicontable WHERE shortcut = $code LIMIT ${limitValue};"
val cursor = this.readableDatabase.rawQuery(command, null)
while (cursor.moveToNext()) {
val order = cursor.getInt(0)
Expand All @@ -365,11 +365,12 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper(
cursor.close()
return candidates
}
fun match(text: String, input: String, mark: String? = null): List<Candidate> {
fun match(text: String, input: String, mark: String? = null, limit: Int? = null): List<Candidate> {
if (text.isBlank()) return emptyList()
val code: Int = text.hashCode()
val limitValue: Int = limit ?: -1
val candidates: MutableList<Candidate> = mutableListOf()
val command = "SELECT rowid, word, romanization FROM lexicontable WHERE ping = ${code};"
val command = "SELECT rowid, word, romanization FROM lexicontable WHERE ping = $code LIMIT ${limitValue};"
val cursor = this.readableDatabase.rawQuery(command, null)
while (cursor.moveToNext()) {
val order = cursor.getInt(0)
Expand Down Expand Up @@ -454,7 +455,7 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper(
if (code == 0) return emptyList()
val items: MutableList<PinyinLexicon> = mutableListOf()
val limitValue: Int = limit ?: 50
val command = "SELECT rowid, word, pinyin FROM pinyintable WHERE shortcut = ${code} LIMIT ${limitValue};"
val command = "SELECT rowid, word, pinyin FROM pinyintable WHERE shortcut = $code LIMIT ${limitValue};"
val cursor = this.readableDatabase.rawQuery(command, null)
while (cursor.moveToNext()) {
val rowID = cursor.getInt(0)
Expand Down

0 comments on commit 799081c

Please sign in to comment.