Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compose: allow overlapping sequences #398

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 55 additions & 14 deletions include/xkbcommon/xkbcommon-compose.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,23 @@ extern "C" {
* @page compose-conflicting Conflicting Sequences
* @parblock
*
* To avoid ambiguity, a sequence is not allowed to be a prefix of another.
* Sequences of length 1 are allowed.
*
* <b><em>Without</em> `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`</b>
*
* To avoid ambiguity, a sequence is *not* allowed to be a prefix of another.
* In such a case, the conflict is resolved thus:
*
* 1. A longer sequence overrides a shorter one.
* 2. An equal sequence overrides an existing one.
* 3. A shorter sequence does not override a longer one.
*
* Sequences of length 1 are allowed.
* <b><em>With</em> `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`</b>
*
* Overlapping sequences of different lengths are *allowed* to co-exist.
* Conflicts are resolved with the following rule:
*
* 1. An equal sequence overrides an existing one.
*
* @endparblock
*/
Expand Down Expand Up @@ -141,7 +150,11 @@ struct xkb_compose_state;
/** Flags affecting Compose file compilation. */
enum xkb_compose_compile_flags {
/** Do not apply any flags. */
XKB_COMPOSE_COMPILE_NO_FLAGS = 0
XKB_COMPOSE_COMPILE_NO_FLAGS = 0,
/** Allow overlapping sequences
* @since 1.7.0
*/
XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES
};

/** The recognized Compose file formats. */
Expand Down Expand Up @@ -450,7 +463,7 @@ enum xkb_compose_state_flags {
* @param flags
* Optional flags for the compose state, or 0.
*
* @returns A new compose state, or NULL on failure.
* @returns A new compose state, or `NULL` on failure.
*
* @memberof xkb_compose_state
*/
Expand All @@ -471,7 +484,7 @@ xkb_compose_state_ref(struct xkb_compose_state *state);
/**
* Release a reference on a compose state object, and possibly free it.
*
* @param state The object. If NULL, do nothing.
* @param state The object. If `NULL`, do nothing.
*
* @memberof xkb_compose_state
*/
Expand Down Expand Up @@ -502,7 +515,17 @@ enum xkb_compose_status {
/** A complete sequence has been matched. */
XKB_COMPOSE_COMPOSED,
/** The last sequence was cancelled due to an unmatched keysym. */
XKB_COMPOSE_CANCELLED
XKB_COMPOSE_CANCELLED,
/** A complete sequence has been matched, but a longer sequence also exists.
*
* @since 1.7.0
*/
XKB_COMPOSE_CANDIDATE,
/** The last sequence was accepted due to an unmatched keysym.
*
* @since 1.7.0
*/
XKB_COMPOSE_CANDIDATE_ACCEPTED
};

/** The effect of a keysym fed to xkb_compose_state_feed(). */
Expand All @@ -524,27 +547,45 @@ enum xkb_compose_feed_result {
* have no effect on the status or otherwise.
*
* The following is a description of the possible status transitions, in
* the format CURRENT STATUS => NEXT STATUS, given a non-ignored input
* keysym `keysym`:
* the format `CURRENT STATUS` => `NEXT STATUS`, given a non-ignored
* input keysym `keysym`:
*
@verbatim
NOTHING or CANCELLED or COMPOSED =>
NOTHING or CANCELLED or COMPOSED or CANDIDATE_ACCEPTED =>
NOTHING if keysym does not start a sequence.
COMPOSING if keysym starts a sequence.
CANDIDATE if keysym starts and terminates a single-keysym sequence,
but a longer sequence also exists.
COMPOSED if keysym starts and terminates a single-keysym sequence.

COMPOSING =>
COMPOSING if keysym advances any of the currently possible
sequences but does not terminate any of them.
CANDIDATE if keysym terminates one of the currently possible
sequences, but a longer sequence also exists.
COMPOSED if keysym terminates one of the currently possible
sequences.
CANCELLED if keysym does not advance any of the currently
possible sequences.

CANDIDATE =>
COMPOSING if keysym advances any of the currently possible
sequences but does not terminate any of them.
CANDIDATE if keysym terminates one of the currently possible
sequences, but a longer sequence also exists.
COMPOSED if keysym terminates one of the currently possible
sequences.
CANDIDATE_ACCEPTED
if keysym does not advance any of the currently
possible sequences, but a candidate was proposed previously.
@endverbatim
*
* @note `CANDIDATE` and `CANDIDATE_ACCEPTED` are only possible when compiling
* using `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`.
*
* The current Compose formats do not support multiple-keysyms.
* Therefore, if you are using a function such as xkb_state_key_get_syms()
* and it returns more than one keysym, consider feeding XKB_KEY_NoSymbol
* and it returns more than one keysym, consider feeding `XKB_KEY_NoSymbol`
* instead.
*
* @param state
Expand All @@ -565,7 +606,7 @@ xkb_compose_state_feed(struct xkb_compose_state *state,
/**
* Reset the Compose sequence state machine.
*
* The status is set to XKB_COMPOSE_NOTHING, and the current sequence
* The status is set to `XKB_COMPOSE_NOTHING`, and the current sequence
* is discarded.
*
* @memberof xkb_compose_state
Expand All @@ -586,7 +627,7 @@ xkb_compose_state_get_status(struct xkb_compose_state *state);
* Get the result Unicode/UTF-8 string for a composed sequence.
*
* See @ref compose-overview for more details. This function is only
* useful when the status is XKB_COMPOSE_COMPOSED.
* useful when the status is `XKB_COMPOSE_COMPOSED` or `XKB_COMPOSE_CANDIDATE`.
*
* @param[in] state
* The compose state.
Expand Down Expand Up @@ -618,10 +659,10 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state,
* Get the result keysym for a composed sequence.
*
* See @ref compose-overview for more details. This function is only
* useful when the status is XKB_COMPOSE_COMPOSED.
* useful when the status is `XKB_COMPOSE_COMPOSED` or `XKB_COMPOSE_CANDIDATE`.
*
* @returns The result keysym. If the sequence is not complete, or does
* not specify a result keysym, returns XKB_KEY_NoSymbol.
* not specify a result keysym, returns `XKB_KEY_NoSymbol`.
*
* @memberof xkb_compose_state
**/
Expand Down
102 changes: 84 additions & 18 deletions src/compose/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,23 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
uint32_t *pptr = NULL;
struct compose_node *node = NULL;
bool allow_overlapping;

/* Warn before potentially going over the limit, discard silently after. */
if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
// TODO: adapt limit if overlapping is disallowed?
/*
* Warn before potentially going over the limit, discard silently after.
*
* We may add up to production->len * 2 - 1 nodes:
* • one node per keysym in the sequence
* • plus one node per keysym for overlap, except for the last node.
*/
if (darray_size(table->nodes) + production->len * 2 - 1 + MAX_LHS_LEN > MAX_COMPOSE_NODES)
scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
if (darray_size(table->nodes) + production->len * 2 - 1 >= MAX_COMPOSE_NODES)
return;

allow_overlapping = !!(table->flags & XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES);

/*
* Insert the sequence to the ternary search tree, creating new nodes as
* needed.
Expand All @@ -375,8 +385,9 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
.lokid = 0,
.hikid = 0,
.internal = {
.eqkid = 0,
.resid = 0,
.is_leaf = false,
.eqkid = 0,
},
};
curr = darray_size(table->nodes);
Expand All @@ -396,47 +407,102 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
pptr = &node->hikid;
curr = node->hikid;
} else if (!last) {
/* Adding intermediate node */
if (node->is_leaf) {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
node->internal.eqkid = 0;
/* Existing leaf */
if (allow_overlapping) {
/* Backup overlapping sequence result */
struct compose_node overlapping = {
.keysym = node->keysym,
.lokid = 0,
.hikid = 0,
.leaf = node->leaf
};
darray_append(table->nodes, overlapping);
node = &darray_item(table->nodes, curr);
node->internal.resid = darray_size(table->nodes) - 1;
} else {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
node->internal.resid = 0;
}
/* Reset node */
node->internal.is_leaf = false;
node->internal.eqkid = 0;
}
lhs_pos++;
pptr = &node->internal.eqkid;
curr = node->internal.eqkid;
} else {
/* Adding the last node of the sequence and the result */
struct compose_node *result = NULL;
bool has_previous_leaf;
if (node->is_leaf) {
/* Existing leaf */
has_previous_leaf = true;
result = node;
} else if (node->internal.eqkid != 0) {
/* Existing non-leaf */
if (!allow_overlapping) {
scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
return;
} else if (node->internal.resid) {
/* Reuse existing overlapping sequence result */
result = &darray_item(table->nodes, node->internal.resid);
has_previous_leaf = true;
} else {
/* Create a new overlapping sequence result */
node->internal.resid = darray_size(table->nodes);
struct compose_node overlapping = {
.keysym = node->keysym,
.lokid = 0,
.hikid = 0,
.leaf = {
.utf8 = 0,
.is_leaf = true,
.keysym = XKB_KEY_NoSymbol
}
};
darray_append(table->nodes, overlapping);
node = &darray_item(table->nodes, curr);
result = &darray_item(table->nodes,
node->internal.resid);
has_previous_leaf = false;
}
} else {
/* New leaf */
has_previous_leaf = false;
node->is_leaf = true;
result = node;
}
if (has_previous_leaf) {
bool same_string =
(node->leaf.utf8 == 0 && !production->has_string) ||
(result->leaf.utf8 == 0 && !production->has_string) ||
(
node->leaf.utf8 != 0 && production->has_string &&
streq(&darray_item(table->utf8, node->leaf.utf8),
result->leaf.utf8 != 0 && production->has_string &&
streq(&darray_item(table->utf8, result->leaf.utf8),
production->string)
);
bool same_keysym =
(node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
(result->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
(
node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
node->leaf.keysym == production->keysym
result->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
result->leaf.keysym == production->keysym
);
if (same_string && same_keysym) {
scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
return;
} else {
scanner_warn(s, "this compose sequence already exists; overriding");
}
} else if (node->internal.eqkid != 0) {
scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
return;
}
node->is_leaf = true;
result->is_leaf = true;
if (production->has_string) {
node->leaf.utf8 = darray_size(table->utf8);
result->leaf.utf8 = darray_size(table->utf8);
darray_append_items(table->utf8, production->string,
strlen(production->string) + 1);
}
if (production->has_keysym) {
node->leaf.keysym = production->keysym;
result->leaf.keysym = production->keysym;
}
return;
}
Expand Down
33 changes: 24 additions & 9 deletions src/compose/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,18 @@ xkb_compose_state_get_status(struct xkb_compose_state *state)
prev_node = &darray_item(state->table->nodes, state->prev_context);
node = &darray_item(state->table->nodes, state->context);

if (state->context == 0 && !prev_node->is_leaf)
return XKB_COMPOSE_CANCELLED;

if (state->context == 0)
if (state->context == 0) {
if (!prev_node->is_leaf)
return prev_node->internal.resid
? XKB_COMPOSE_CANDIDATE_ACCEPTED
: XKB_COMPOSE_CANCELLED;
return XKB_COMPOSE_NOTHING;
}

if (!node->is_leaf)
return XKB_COMPOSE_COMPOSING;
return node->internal.resid
? XKB_COMPOSE_CANDIDATE
: XKB_COMPOSE_COMPOSING;

return XKB_COMPOSE_COMPOSED;
}
Expand All @@ -162,8 +166,14 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state,
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);

if (!node->is_leaf)
goto fail;
if (!node->is_leaf) {
if (node->internal.resid) {
node = &darray_item(state->table->nodes,
node->internal.resid);
} else {
goto fail;
}
}

/* If there's no string specified, but only a keysym, try to do the
* most helpful thing. */
Expand Down Expand Up @@ -195,7 +205,12 @@ xkb_compose_state_get_one_sym(struct xkb_compose_state *state)
{
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);
if (!node->is_leaf)
if (node->is_leaf) {
return node->leaf.keysym;
} else if (node->internal.resid) {
return darray_item(state->table->nodes,
node->internal.resid).leaf.keysym;
} else {
return XKB_KEY_NoSymbol;
return node->leaf.keysym;
}
}
Loading