From 952aa3fb9b34f365821899ab171621c89f9fa537 Mon Sep 17 00:00:00 2001 From: sigoden Date: Tue, 31 Oct 2023 18:34:38 +0800 Subject: [PATCH] chore: move split_line_* from repl/cmd.rs to utils/ (#190) --- README.md | 2 +- config.example.yaml | 2 +- src/cli.rs | 2 +- src/config/mod.rs | 2 +- src/render/cmd.rs | 218 +--------------------------------------- src/render/repl.rs | 3 +- src/utils/mod.rs | 2 + src/utils/split_line.rs | 216 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 225 insertions(+), 222 deletions(-) create mode 100644 src/utils/split_line.rs diff --git a/README.md b/README.md index 27317844..04142e65 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ temperature: 1.0 # GPT temperature, between 0 and 2 save: true # Whether to save the message highlight: true # Set false to turn highlight light_theme: false # Whether to use a light theme -wrap: no # Specify the text-wrapping mode (*no*, auto, ) +wrap: no # Specify the text-wrapping mode (no, auto, ) wrap_code: false # Whether wrap code block auto_copy: false # Automatically copy the last output to the clipboard keybindings: emacs # REPL keybindings. values: emacs, vi diff --git a/config.example.yaml b/config.example.yaml index 3b3a6ac7..52c147c5 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -3,7 +3,7 @@ temperature: 1.0 # GPT temperature, between 0 and 2 save: true # Whether to save the message highlight: true # Set false to turn highlight light_theme: false # Whether to use a light theme -wrap: no # Specify the text-wrapping mode (no*, auto, ) +wrap: no # Specify the text-wrapping mode (no, auto, ) wrap_code: false # Whether wrap code block auto_copy: false # Automatically copy the last output to the clipboard keybindings: emacs # REPL keybindings. values: emacs, vi diff --git a/src/cli.rs b/src/cli.rs index 08141102..629266c4 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -18,7 +18,7 @@ pub struct Cli { /// No stream output #[clap(short = 'S', long)] pub no_stream: bool, - /// Specify the text-wrapping mode (no*, auto, ) + /// Specify the text-wrapping mode (no, auto, ) #[clap(short = 'w', long)] pub wrap: Option, /// Use light theme diff --git a/src/config/mod.rs b/src/config/mod.rs index c05d2e90..956bea01 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -65,7 +65,7 @@ pub struct Config { pub dry_run: bool, /// Whether to use a light theme pub light_theme: bool, - /// Specify the text-wrapping mode (*no*, auto, ) + /// Specify the text-wrapping mode (no, auto, ) pub wrap: Option, /// Whether wrap code block pub wrap_code: bool, diff --git a/src/render/cmd.rs b/src/render/cmd.rs index 57b62568..115170b7 100644 --- a/src/render/cmd.rs +++ b/src/render/cmd.rs @@ -2,6 +2,7 @@ use super::MarkdownRender; use crate::print_now; use crate::repl::{ReplyStreamEvent, SharedAbortSignal}; +use crate::utils::{spaces, split_line_sematic, split_line_tail}; use anyhow::Result; use crossbeam::channel::Receiver; @@ -65,220 +66,3 @@ pub fn cmd_render_stream( } Ok(()) } - -fn split_line_sematic(text: &str) -> Option<(String, String)> { - let mut balance: Vec = Vec::new(); - let chars: Vec = text.chars().collect(); - let mut index = 0; - let len = chars.len(); - while index < len - 1 { - let ch = chars[index]; - if balance.is_empty() - && ((matches!(ch, ',' | '.' | ';') && chars[index + 1].is_whitespace()) - || matches!(ch, ',' | '。' | ';')) - { - let (output, remain) = chars.split_at(index + 1); - return Some((output.iter().collect(), remain.iter().collect())); - } - if index + 2 < len && do_balance(&mut balance, &chars[index..=index + 2]) { - index += 3; - continue; - } - if do_balance(&mut balance, &chars[index..=index + 1]) { - index += 2; - continue; - } - do_balance(&mut balance, &chars[index..=index]); - index += 1; - } - - None -} - -pub(crate) fn split_line_tail(text: &str) -> (&str, &str) { - if let Some((head, tail)) = text.rsplit_once('\n') { - (head, tail) - } else { - ("", text) - } -} - -fn spaces(n: usize) -> String { - " ".repeat(n) -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -enum Kind { - ParentheseStart, - ParentheseEnd, - BracketStart, - BracketEnd, - Asterisk, - Asterisk2, - SingleQuota, - DoubleQuota, - Tilde, - Tilde2, - Backtick, - Backtick3, -} - -impl Kind { - fn from_chars(chars: &[char]) -> Option { - let kind = match chars.len() { - 1 => match chars[0] { - '(' => Self::ParentheseStart, - ')' => Self::ParentheseEnd, - '[' => Self::BracketStart, - ']' => Self::BracketEnd, - '*' => Self::Asterisk, - '\'' => Self::SingleQuota, - '"' => Self::DoubleQuota, - '~' => Self::Tilde, - '`' => Self::Backtick, - _ => return None, - }, - 2 if chars[0] == chars[1] => match chars[0] { - '*' => Self::Asterisk2, - '~' => Self::Tilde2, - _ => return None, - }, - 3 => { - if chars == ['`', '`', '`'] { - Self::Backtick3 - } else { - return None; - } - } - _ => return None, - }; - Some(kind) - } -} - -fn do_balance(balance: &mut Vec, chars: &[char]) -> bool { - Kind::from_chars(chars).map_or(false, |kind| { - let last = balance.last(); - match (kind, last) { - (Kind::ParentheseEnd, Some(&Kind::ParentheseStart)) - | (Kind::BracketEnd, Some(&Kind::BracketStart)) - | (Kind::Asterisk, Some(&Kind::Asterisk)) - | (Kind::Asterisk2, Some(&Kind::Asterisk2)) - | (Kind::SingleQuota, Some(&Kind::SingleQuota)) - | (Kind::DoubleQuota, Some(&Kind::DoubleQuota)) - | (Kind::Tilde, Some(&Kind::Tilde)) - | (Kind::Tilde2, Some(&Kind::Tilde2)) - | (Kind::Backtick, Some(&Kind::Backtick)) - | (Kind::Backtick3, Some(&Kind::Backtick3)) => { - balance.pop(); - true - } - ( - Kind::ParentheseStart - | Kind::BracketStart - | Kind::Asterisk - | Kind::Asterisk2 - | Kind::SingleQuota - | Kind::DoubleQuota - | Kind::Tilde - | Kind::Tilde2 - | Kind::Backtick - | Kind::Backtick3, - _, - ) => { - balance.push(kind); - true - } - _ => false, - } - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - macro_rules! assert_split_line { - ($a:literal, $b:literal, true) => { - assert_eq!( - split_line_sematic(&format!("{}{}", $a, $b)), - Some(($a.into(), $b.into())) - ); - }; - ($a:literal, $b:literal, false) => { - assert_eq!(split_line_sematic(&format!("{}{}", $a, $b)), None); - }; - } - - #[test] - fn test_split_line() { - assert_split_line!( - "Lorem ipsum dolor sit amet,", - " consectetur adipiscing elit.", - true - ); - assert_split_line!( - "Lorem ipsum dolor sit amet.", - " consectetur adipiscing elit.", - true - ); - assert_split_line!("黃更室幼許刀知,", "波食小午足田世根候法。", true); - assert_split_line!("黃更室幼許刀知。", "波食小午足田世根候法。", true); - assert_split_line!("黃更室幼許刀知;", "波食小午足田世根候法。", true); - assert_split_line!( - "Lorem ipsum (dolor sit amet).", - " consectetur adipiscing elit.", - true - ); - assert_split_line!( - "Lorem ipsum dolor sit `amet,", - " consectetur` adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit ```amet,", - " consectetur``` adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit *amet,", - " consectetur* adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit **amet,", - " consectetur** adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit ~amet,", - " consectetur~ adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit ~~amet,", - " consectetur~~ adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit ``amet,", - " consectetur`` adipiscing elit.", - true - ); - assert_split_line!( - "Lorem ipsum dolor sit \"amet,", - " consectetur\" adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit 'amet,", - " consectetur' adipiscing elit.", - false - ); - assert_split_line!( - "Lorem ipsum dolor sit amet.", - "consectetur adipiscing elit.", - false - ); - } -} diff --git a/src/render/repl.rs b/src/render/repl.rs index d55c1c15..845a5e36 100644 --- a/src/render/repl.rs +++ b/src/render/repl.rs @@ -1,6 +1,7 @@ -use super::{cmd::split_line_tail, MarkdownRender}; +use super::MarkdownRender; use crate::repl::{ReplyStreamEvent, SharedAbortSignal}; +use crate::utils::split_line_tail; use anyhow::Result; use crossbeam::channel::Receiver; diff --git a/src/utils/mod.rs b/src/utils/mod.rs index db53fd63..9e999d46 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,5 +1,7 @@ +mod split_line; mod tiktoken; +pub use self::split_line::*; pub use self::tiktoken::cl100k_base_singleton; use chrono::prelude::*; diff --git a/src/utils/split_line.rs b/src/utils/split_line.rs new file mode 100644 index 00000000..105c5f34 --- /dev/null +++ b/src/utils/split_line.rs @@ -0,0 +1,216 @@ +pub fn split_line_sematic(text: &str) -> Option<(String, String)> { + let mut balance: Vec = Vec::new(); + let chars: Vec = text.chars().collect(); + let mut index = 0; + let len = chars.len(); + while index < len - 1 { + let ch = chars[index]; + if balance.is_empty() + && ((matches!(ch, ',' | '.' | ';') && chars[index + 1].is_whitespace()) + || matches!(ch, ',' | '。' | ';')) + { + let (output, remain) = chars.split_at(index + 1); + return Some((output.iter().collect(), remain.iter().collect())); + } + if index + 2 < len && do_balance(&mut balance, &chars[index..=index + 2]) { + index += 3; + continue; + } + if do_balance(&mut balance, &chars[index..=index + 1]) { + index += 2; + continue; + } + do_balance(&mut balance, &chars[index..=index]); + index += 1; + } + + None +} + +pub fn split_line_tail(text: &str) -> (&str, &str) { + if let Some((head, tail)) = text.rsplit_once('\n') { + (head, tail) + } else { + ("", text) + } +} + +pub fn spaces(n: usize) -> String { + " ".repeat(n) +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +enum Kind { + ParentheseStart, + ParentheseEnd, + BracketStart, + BracketEnd, + Asterisk, + Asterisk2, + SingleQuota, + DoubleQuota, + Tilde, + Tilde2, + Backtick, + Backtick3, +} + +impl Kind { + fn from_chars(chars: &[char]) -> Option { + let kind = match chars.len() { + 1 => match chars[0] { + '(' => Self::ParentheseStart, + ')' => Self::ParentheseEnd, + '[' => Self::BracketStart, + ']' => Self::BracketEnd, + '*' => Self::Asterisk, + '\'' => Self::SingleQuota, + '"' => Self::DoubleQuota, + '~' => Self::Tilde, + '`' => Self::Backtick, + _ => return None, + }, + 2 if chars[0] == chars[1] => match chars[0] { + '*' => Self::Asterisk2, + '~' => Self::Tilde2, + _ => return None, + }, + 3 => { + if chars == ['`', '`', '`'] { + Self::Backtick3 + } else { + return None; + } + } + _ => return None, + }; + Some(kind) + } +} + +fn do_balance(balance: &mut Vec, chars: &[char]) -> bool { + Kind::from_chars(chars).map_or(false, |kind| { + let last = balance.last(); + match (kind, last) { + (Kind::ParentheseEnd, Some(&Kind::ParentheseStart)) + | (Kind::BracketEnd, Some(&Kind::BracketStart)) + | (Kind::Asterisk, Some(&Kind::Asterisk)) + | (Kind::Asterisk2, Some(&Kind::Asterisk2)) + | (Kind::SingleQuota, Some(&Kind::SingleQuota)) + | (Kind::DoubleQuota, Some(&Kind::DoubleQuota)) + | (Kind::Tilde, Some(&Kind::Tilde)) + | (Kind::Tilde2, Some(&Kind::Tilde2)) + | (Kind::Backtick, Some(&Kind::Backtick)) + | (Kind::Backtick3, Some(&Kind::Backtick3)) => { + balance.pop(); + true + } + ( + Kind::ParentheseStart + | Kind::BracketStart + | Kind::Asterisk + | Kind::Asterisk2 + | Kind::SingleQuota + | Kind::DoubleQuota + | Kind::Tilde + | Kind::Tilde2 + | Kind::Backtick + | Kind::Backtick3, + _, + ) => { + balance.push(kind); + true + } + _ => false, + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! assert_split_line { + ($a:literal, $b:literal, true) => { + assert_eq!( + split_line_sematic(&format!("{}{}", $a, $b)), + Some(($a.into(), $b.into())) + ); + }; + ($a:literal, $b:literal, false) => { + assert_eq!(split_line_sematic(&format!("{}{}", $a, $b)), None); + }; + } + + #[test] + fn test_split_line() { + assert_split_line!( + "Wikipedia is a free online encyclopedia,", + " that anyone can edit,", + true + ); + assert_split_line!( + "Wikipedia is a free online encyclopedia.", + " that anyone can edit,", + true + ); + assert_split_line!("床前明月光,", "疑是地上霜。", true); + assert_split_line!("床前明月光。", "疑是地上霜。", true); + assert_split_line!("床前明月光;", "疑是地上霜。", true); + assert_split_line!( + "Wikipedia is (a free online encyclopedia).", + " that anyone can edit.", + true + ); + assert_split_line!( + "Wikipedia is a free online `encyclopedia,", + " that` anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online ```encyclopedia,", + " that``` anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online *encyclopedia,", + " that* anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online **encyclopedia,", + " that** anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online ~encyclopedia,", + " that~ anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online ~~encyclopedia,", + " that~~ anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online ``encyclopedia,", + " that`` anyone can edit.", + true + ); + assert_split_line!( + "Wikipedia is a free online \"encyclopedia,", + " that\" anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online 'encyclopedia,", + " that' anyone can edit.", + false + ); + assert_split_line!( + "Wikipedia is a free online encyclopedia.", + "that anyone can edit.", + false + ); + } +}