Skip to content

Commit

Permalink
using user languages in tokenization and language detectio
Browse files Browse the repository at this point in the history
  • Loading branch information
wcjord committed Oct 25, 2024
1 parent 7b60190 commit dd29817
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 125 deletions.
35 changes: 11 additions & 24 deletions lib/pangea/controllers/language_detection_controller.dart
Original file line number Diff line number Diff line change
Expand Up @@ -14,39 +14,39 @@ class LanguageDetectionRequest {
/// The full text from which to detect the language.
String fullText;

/// The base language of the user, if known. Including this is much preferred
/// The base language of the user that sent the meessage, if known. Including this is much preferred
/// and should return better results; however, it is not absolutely necessary.
/// This property is nullable to allow for situations where the languages are not set
/// at the time of the request.
String? userL1;
String? senderL1;

/// The target language of the user. This is expected to be set for the request
/// The target language of the user that sent the message. This is expected to be set for the request
/// but is nullable to handle edge cases where it might not be.
String? userL2;
String? senderL2;

LanguageDetectionRequest({
required this.fullText,
this.userL1 = "",
required this.userL2,
required this.senderL1,
required this.senderL2,
});

Map<String, dynamic> toJson() => {
'full_text': fullText,
'user_l1': userL1,
'user_l2': userL2,
'sender_l1': senderL1,
'sender_l2': senderL2,
};

@override
bool operator ==(Object other) {
if (identical(this, other)) return true;
return other is LanguageDetectionRequest &&
other.fullText == fullText &&
other.userL1 == userL1 &&
other.userL2 == userL2;
other.senderL1 == senderL1 &&
other.senderL2 == senderL2;
}

@override
int get hashCode => fullText.hashCode ^ userL1.hashCode ^ userL2.hashCode;
int get hashCode => fullText.hashCode ^ senderL1.hashCode ^ senderL2.hashCode;
}

class LanguageDetectionResponse {
Expand Down Expand Up @@ -125,19 +125,6 @@ class LanguageDetectionController {
_cacheClearTimer?.cancel();
}

Future<LanguageDetectionResponse> detectLanguage(
String fullText,
String? userL2,
String? userL1,
) async {
final LanguageDetectionRequest params = LanguageDetectionRequest(
fullText: fullText,
userL1: userL1,
userL2: userL2,
);
return get(params);
}

Future<LanguageDetectionResponse> get(
LanguageDetectionRequest params,
) async {
Expand Down
41 changes: 39 additions & 2 deletions lib/pangea/controllers/message_data_controller.dart
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import 'dart:async';
import 'dart:convert';

import 'package:fluffychat/pangea/config/environment.dart';
import 'package:fluffychat/pangea/controllers/base_controller.dart';
import 'package:fluffychat/pangea/controllers/pangea_controller.dart';
import 'package:fluffychat/pangea/extensions/pangea_room_extension/pangea_room_extension.dart';
import 'package:fluffychat/pangea/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/models/representation_content_model.dart';
import 'package:fluffychat/pangea/models/token_api_models.dart';
import 'package:fluffychat/pangea/models/tokens_event_content_model.dart';
import 'package:fluffychat/pangea/repo/tokens_repo.dart';
import 'package:fluffychat/pangea/network/requests.dart';
import 'package:fluffychat/pangea/network/urls.dart';
import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:http/http.dart';
import 'package:matrix/matrix.dart';

import '../constants/pangea_event_types.dart';
Expand Down Expand Up @@ -49,14 +54,46 @@ class MessageDataController extends BaseController {
super.dispose();
}

/// get tokens from the server
static Future<TokensResponseModel> _fetchTokens(
String accessToken,
TokensRequestModel request,
) async {
final Requests req = Requests(
choreoApiKey: Environment.choreoApiKey,
accessToken: accessToken,
);

final Response res = await req.post(
url: PApiUrls.tokenize,
body: request.toJson(),
);

final TokensResponseModel response = TokensResponseModel.fromJson(
jsonDecode(
utf8.decode(res.bodyBytes).toString(),
),
);

if (response.tokens.isEmpty) {
ErrorHandler.logError(
e: Exception(
"empty tokens in tokenize response return",
),
);
}

return response;
}

/// get tokens from the server
/// if repEventId is not null, send the tokens to the room
Future<List<PangeaToken>> _getTokens({
required String? repEventId,
required TokensRequestModel req,
required Room? room,
}) async {
final TokensResponseModel res = await TokensRepo.tokenize(
final TokensResponseModel res = await _fetchTokens(
_pangeaController.userController.accessToken,
req,
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import 'dart:developer';
import 'package:fluffychat/pangea/extensions/pangea_event_extension.dart';
import 'package:fluffychat/pangea/matrix_event_wrappers/pangea_choreo_event.dart';
import 'package:fluffychat/pangea/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/models/token_api_models.dart';
import 'package:fluffychat/pangea/models/tokens_event_content_model.dart';
import 'package:fluffychat/pangea/repo/tokens_repo.dart';
import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:matrix/matrix.dart';
Expand Down Expand Up @@ -135,13 +135,17 @@ class RepresentationEvent {
await MatrixState.pangeaController.messageData.getTokens(
repEventId: _event?.eventId,
room: _event?.room ?? parentMessageEvent.room,
// Jordan - for just tokens, it's not clear which languages to pass
req: TokensRequestModel(
fullText: text,
userL1:
langCode: langCode,
senderL1:
MatrixState.pangeaController.languageController.userL1?.langCode ??
LanguageKeys.unknownLanguage,
userL2: langCode,
// since langCode is known, senderL2 will be used to determine whether these tokens
// need pos/mporph tags whether lemmas are eligible to marked as "save_vocab=true"
senderL2:
MatrixState.pangeaController.languageController.userL2?.langCode ??
LanguageKeys.unknownLanguage,
),
);

Expand Down
72 changes: 72 additions & 0 deletions lib/pangea/models/token_api_models.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import 'package:fluffychat/pangea/constants/model_keys.dart';

import 'pangea_token_model.dart';

class TokensRequestModel {
/// the text to be tokenized
String fullText;

/// if known, [langCode] is the language of of the text
/// it is used to determine which model to use in tokenizing
String? langCode;

/// [senderL1] and [senderL2] are the languages of the sender
/// if langCode is not known, the [senderL1] and [senderL2] will be used to help determine the language of the text
/// if langCode is known, [senderL1] and [senderL2] will be used to determine whether the tokens need
/// pos/mporph tags and whether lemmas are eligible to marked as "save_vocab=true"
String senderL1;

/// [senderL1] and [senderL2] are the languages of the sender
/// if langCode is not known, the [senderL1] and [senderL2] will be used to help determine the language of the text
/// if langCode is known, [senderL1] and [senderL2] will be used to determine whether the tokens need
/// pos/mporph tags and whether lemmas are eligible to marked as "save_vocab=true"
String senderL2;

TokensRequestModel({
required this.fullText,
required this.langCode,
required this.senderL1,
required this.senderL2,
});

Map<String, dynamic> toJson() => {
ModelKey.fullText: fullText,
ModelKey.userL1: senderL1,
ModelKey.userL2: senderL2,
ModelKey.langCode: langCode,
};

// override equals and hashcode
@override
bool operator ==(Object other) {
if (identical(this, other)) return true;

return other is TokensRequestModel &&
other.fullText == fullText &&
other.senderL1 == senderL1 &&
other.senderL2 == senderL2;
}

@override
int get hashCode => fullText.hashCode ^ senderL1.hashCode ^ senderL2.hashCode;
}

class TokensResponseModel {
List<PangeaToken> tokens;
String lang;

TokensResponseModel({required this.tokens, required this.lang});

factory TokensResponseModel.fromJson(
Map<String, dynamic> json,
) =>
TokensResponseModel(
tokens: (json[ModelKey.tokens] as Iterable)
.map<PangeaToken>(
(e) => PangeaToken.fromJson(e as Map<String, dynamic>),
)
.toList()
.cast<PangeaToken>(),
lang: json[ModelKey.lang],
);
}
95 changes: 0 additions & 95 deletions lib/pangea/repo/tokens_repo.dart

This file was deleted.

0 comments on commit dd29817

Please sign in to comment.