From 799081c7fe6afd113c7770b150d93e0a94119fe0 Mon Sep 17 00:00:00 2001 From: Bing ZHEUNG Date: Tue, 16 Jul 2024 04:18:49 +0800 Subject: [PATCH] Improve Engine.suggest() --- .../jyutping/jyutping/keyboard/Candidate.kt | 8 +++ .../org/jyutping/jyutping/keyboard/Engine.kt | 51 ++++++++++++------- .../jyutping/utilities/DatabaseHelper.kt | 17 ++++--- 3 files changed, 50 insertions(+), 26 deletions(-) diff --git a/app/src/main/java/org/jyutping/jyutping/keyboard/Candidate.kt b/app/src/main/java/org/jyutping/jyutping/keyboard/Candidate.kt index 02ecf7c..7cacd45 100644 --- a/app/src/main/java/org/jyutping/jyutping/keyboard/Candidate.kt +++ b/app/src/main/java/org/jyutping/jyutping/keyboard/Candidate.kt @@ -2,6 +2,7 @@ package org.jyutping.jyutping.keyboard import org.jyutping.jyutping.CharacterStandard import org.jyutping.jyutping.extensions.convertedT2S +import org.jyutping.jyutping.extensions.space data class Candidate( val type: CandidateType = CandidateType.Cantonese, @@ -26,6 +27,13 @@ data class Candidate( CandidateType.Cantonese -> (text.hashCode() * 31 + romanization.hashCode()) else -> text.hashCode() } + operator fun plus(another: Candidate): Candidate { + val newText = this.text + another.text + val newRomanization = this.romanization + String.space + another.romanization + val newInput = this.input + another.input + val newMark = this.mark + String.space + another.mark + return Candidate(text = newText, romanization = newRomanization, input = newInput, mark = newMark) + } } fun Candidate.transformed(characterStandard: CharacterStandard): Candidate { diff --git a/app/src/main/java/org/jyutping/jyutping/keyboard/Engine.kt b/app/src/main/java/org/jyutping/jyutping/keyboard/Engine.kt index b46c832..557d4ab 100644 --- a/app/src/main/java/org/jyutping/jyutping/keyboard/Engine.kt +++ b/app/src/main/java/org/jyutping/jyutping/keyboard/Engine.kt @@ -6,24 +6,26 @@ import org.jyutping.jyutping.utilities.DatabaseHelper object Engine { fun suggest(text: String, segmentation: Segmentation, db: DatabaseHelper): List { - if (db.canProcess(text).not()) return emptyList() - if (segmentation.maxSchemeLength() < 1) return processVerbatim(text, db) - return process(text, segmentation, db) + if (segmentation.maxSchemeLength() < 1) { + return processVerbatim(text, db) + } else { + return process(text, segmentation, db) + } } - private fun processVerbatim(text: String, db: DatabaseHelper): List { + private fun processVerbatim(text: String, db: DatabaseHelper, limit: Int? = null): List { val rounds: MutableList> = mutableListOf() for (number in text.indices) { val leading = text.dropLast(number) - val round = db.match(text = leading, input = leading) + db.shortcut(text = leading) + val round = db.match(text = leading, input = leading, limit = limit) + db.shortcut(leading, limit) rounds.add(round) } return rounds.flatten().distinct() } - private fun process(text: String, segmentation: Segmentation, db: DatabaseHelper): List { + private fun process(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List { val textLength = text.length - val primary = query(text, segmentation, db) + val primary = query(text, segmentation, db, limit) val firstInputLength = primary.firstOrNull()?.input?.length ?: 0 - if (firstInputLength == 0) return processVerbatim(text, db) + if (firstInputLength == 0) return processVerbatim(text, db, limit) if (firstInputLength == textLength) return primary val prefixes: List = run { if (segmentation.maxSchemeLength() >= textLength) emptyList() @@ -34,7 +36,7 @@ object Engine { val schemeAnchors = scheme.mapNotNull { it.text.firstOrNull() } val anchors: String = (schemeAnchors + lastAnchor).joinToString(separator = String.empty) val text2mark = scheme.joinToString(separator = String.space) { it.text } + String.space + tail - val shortcut = db.shortcut(anchors) + val shortcut = db.shortcut(anchors, limit) .filter { candidate -> candidate.romanization.filter { it.isDigit().not() }.startsWith(text2mark) } .map { Candidate(text = it.text, romanization = it.romanization, input = text, mark = text2mark) } shortcuts.add(shortcut) @@ -42,17 +44,31 @@ object Engine { shortcuts.flatten() } if (prefixes.isNotEmpty()) return prefixes + primary - return primary + val headTexts = primary.map { it.input }.distinct() + val concatenated: MutableList> = mutableListOf() + for (headText in headTexts) { + val headInputLength = headText.length + val tailText = text.drop(headInputLength) + if (db.canProcess(tailText).not()) continue + val tailSegmentation = Segmentor.segment(tailText, db) + val tailCandidates = process(text = tailText, segmentation = tailSegmentation, db = db, limit = 8).take(100) + if (tailCandidates.isEmpty()) continue + val headCandidates = primary.takeWhile { it.input == headText }.take(8) + val combines = headCandidates.map { head -> tailCandidates.map { head + it } } + concatenated.add(combines.flatten()) + } + val preferredConcatenated = preferred(text, concatenated.flatten().distinct()).take(1) + return preferredConcatenated + primary } - private fun query(text: String, segmentation: Segmentation, db: DatabaseHelper): List { + private fun query(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List { val textLength = text.length - val searches = search(text, segmentation, db) + val searches = search(text, segmentation, db, limit) val preferredSearched = searches.filter { it.input.length == textLength } - val matched = db.match(text = text, input = text) - val shortcut = db.shortcut(text) + val matched = db.match(text = text, input = text, limit = limit) + val shortcut = db.shortcut(text, limit) return (matched + preferredSearched + shortcut + searches).distinct() } - private fun search(text: String, segmentation: Segmentation, db: DatabaseHelper): List { + private fun search(text: String, segmentation: Segmentation, db: DatabaseHelper, limit: Int? = null): List { val textLength = text.length val perfectSchemes = segmentation.filter { it.length() == textLength } if (perfectSchemes.isNotEmpty()) { @@ -63,7 +79,7 @@ object Engine { val pingText = slice.joinToString(separator = String.empty) { it.origin } val inputText = slice.joinToString(separator = String.empty) { it.text } val text2mark = slice.joinToString(separator = String.space) { it.text } - val matched = db.match(text = pingText, input = inputText, mark = text2mark) + val matched = db.match(text = pingText, input = inputText, mark = text2mark, limit = limit) matches.add(matched) } } @@ -74,13 +90,12 @@ object Engine { val pingText = scheme.joinToString(separator = String.empty) { it.origin } val inputText = scheme.joinToString(separator = String.empty) { it.text } val text2mark = scheme.joinToString(separator = String.space) { it.text } - val matched = db.match(text = pingText, input = inputText, mark = text2mark) + val matched = db.match(text = pingText, input = inputText, mark = text2mark, limit = limit) matches.add(matched) } return ordered(textLength, matches.flatten()) } } - private fun preferred(text: String, candidates: List): List { val sorted = candidates.sortedWith(compareBy({-it.input.length}, {-it.text.length})) val matched = sorted.filter { candidate -> candidate.romanization.filter { it.isLetter() } == text } diff --git a/app/src/main/java/org/jyutping/jyutping/utilities/DatabaseHelper.kt b/app/src/main/java/org/jyutping/jyutping/utilities/DatabaseHelper.kt index f585adb..e59f21e 100644 --- a/app/src/main/java/org/jyutping/jyutping/utilities/DatabaseHelper.kt +++ b/app/src/main/java/org/jyutping/jyutping/utilities/DatabaseHelper.kt @@ -337,9 +337,8 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper( } fun canProcess(text: String): Boolean { - val value = text.firstOrNull()?.intercode() ?: 0 - if (value == 0) return false - val code = if (value == 44) 29 else value // Replace 'y' with 'j' + val value: Int = text.firstOrNull()?.intercode() ?: return false + val code: Int = if (value == 44) 29 else value // Replace 'y' with 'j' val command = "SELECT rowid FROM lexicontable WHERE shortcut = $code LIMIT 1;" val cursor = this.readableDatabase.rawQuery(command, null) if (cursor.moveToFirst()) { @@ -349,11 +348,12 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper( return false } } - fun shortcut(text: String): List { + fun shortcut(text: String, limit: Int? = null): List { val code: Int = text.shortcutCharcode() ?: 0 if (code == 0) return emptyList() + val limitValue: Int = limit ?: 50 val candidates: MutableList = mutableListOf() - val command = "SELECT rowid, word, romanization FROM lexicontable WHERE shortcut = $code LIMIT 50;" + val command = "SELECT rowid, word, romanization FROM lexicontable WHERE shortcut = $code LIMIT ${limitValue};" val cursor = this.readableDatabase.rawQuery(command, null) while (cursor.moveToNext()) { val order = cursor.getInt(0) @@ -365,11 +365,12 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper( cursor.close() return candidates } - fun match(text: String, input: String, mark: String? = null): List { + fun match(text: String, input: String, mark: String? = null, limit: Int? = null): List { if (text.isBlank()) return emptyList() val code: Int = text.hashCode() + val limitValue: Int = limit ?: -1 val candidates: MutableList = mutableListOf() - val command = "SELECT rowid, word, romanization FROM lexicontable WHERE ping = ${code};" + val command = "SELECT rowid, word, romanization FROM lexicontable WHERE ping = $code LIMIT ${limitValue};" val cursor = this.readableDatabase.rawQuery(command, null) while (cursor.moveToNext()) { val order = cursor.getInt(0) @@ -454,7 +455,7 @@ class DatabaseHelper(context: Context, databaseName: String) : SQLiteOpenHelper( if (code == 0) return emptyList() val items: MutableList = mutableListOf() val limitValue: Int = limit ?: 50 - val command = "SELECT rowid, word, pinyin FROM pinyintable WHERE shortcut = ${code} LIMIT ${limitValue};" + val command = "SELECT rowid, word, pinyin FROM pinyintable WHERE shortcut = $code LIMIT ${limitValue};" val cursor = this.readableDatabase.rawQuery(command, null) while (cursor.moveToNext()) { val rowID = cursor.getInt(0)