forked from llelf/text-icu-translit
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from wireapp/mangoiv/upgrade-to-94
[feat] upgrade to text 2.0
- Loading branch information
Showing
19 changed files
with
511 additions
and
207 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
use flake . -Lv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
name: "Check Flake" | ||
on: | ||
workflow_dispatch: | ||
pull_request: | ||
push: | ||
jobs: | ||
check-flake: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: cachix/install-nix-action@v22 | ||
with: | ||
github_access_token: ${{ secrets.GITHUB_TOKEN }} | ||
- name: "flake check" | ||
run: nix flake check -Lv --allow-import-from-derivation --fallback --accept-flake-config | ||
- name: GHC 9.4 | ||
run: nix build .#ghc94-text-icu-translit -Lv --fallback --accept-flake-config | ||
- name: GHC 9.6 | ||
run: nix build .#ghc96-text-icu-translit -Lv --fallback --accept-flake-config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: update-flake-lock | ||
on: | ||
workflow_dispatch: # allows manual triggering | ||
schedule: | ||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00 | ||
|
||
jobs: | ||
lockfile: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
- name: Install Nix | ||
uses: DeterminateSystems/nix-installer-action@main | ||
- name: Update flake.lock | ||
uses: DeterminateSystems/update-flake-lock@main | ||
with: | ||
pr-title: "Update flake.lock" # Title of PR to be created | ||
pr-labels: | # Labels to be set on the PR | ||
dependencies | ||
automated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.direnv | ||
result* | ||
dist-newstyle | ||
.pre-commit-config.yaml |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,39 +2,32 @@ | |
-- Module: Data.Text.ICU.Translit | ||
-- License: BSD-style | ||
-- Maintainer: [email protected] | ||
-- | ||
-- | ||
-- This module provides the bindings to the transliteration features | ||
-- by the ICU (International Components for Unicode) library. | ||
-- | ||
-- | ||
-- >>> IO.putStrLn $ transliterate (trans "name-any; ru") "\\N{RABBIT FACE} Nu pogodi!" | ||
-- 🐰 Ну погоди! | ||
-- | ||
-- | ||
-- >>> IO.putStrLn $ transliterate (trans "nl-title") "gelderse ijssel" | ||
-- Gelderse IJssel | ||
-- | ||
-- | ||
-- >>> IO.putStrLn $ transliterate (trans "ja") "Amsterdam" | ||
-- アムステルダム | ||
-- | ||
-- | ||
-- More information about the rules is | ||
-- <http://userguide.icu-project.org/transforms/general here>. | ||
module Data.Text.ICU.Translit (IO.Transliterator, trans, transliterate) where | ||
|
||
|
||
|
||
module Data.Text.ICU.Translit | ||
(IO.Transliterator, trans, transliterate) where | ||
|
||
import qualified Data.Text.ICU.Translit.IO as IO | ||
import System.IO.Unsafe | ||
import Data.Text | ||
|
||
import Data.Text.ICU.Translit.IO qualified as IO | ||
import System.IO.Unsafe | ||
|
||
-- | Construct new transliterator by name. Will throw an error if | ||
-- there is no such transliterator | ||
trans :: Text -> IO.Transliterator | ||
trans t = unsafePerformIO $ IO.transliterator t | ||
|
||
|
||
-- | Transliterate the text using the transliterator | ||
transliterate :: IO.Transliterator -> Text -> Text | ||
transliterate tr txt = unsafePerformIO $ IO.transliterate tr txt | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,69 @@ | ||
module Data.Text.ICU.Translit.IO | ||
(Transliterator, | ||
transliterator, | ||
transliterate) where | ||
( Transliterator, | ||
transliterator, | ||
transliterate, | ||
) | ||
where | ||
|
||
import Foreign | ||
import Data.Text | ||
import Data.Text.Foreign | ||
import Data.ByteString (ByteString) | ||
import Data.ByteString qualified as BS | ||
import Data.Text (Text) | ||
import Data.Text.Encoding qualified as T | ||
import Data.Text.ICU.Translit.ICUHelper | ||
( UChar, | ||
UErrorCode, | ||
handleError, | ||
handleFilledOverflowError, | ||
) | ||
import Foreign | ||
|
||
data UTransliterator | ||
|
||
foreign import ccall "trans.h __hs_translit_open_trans" openTrans | ||
:: Ptr UChar -> Int -> Ptr UErrorCode -> IO (Ptr UTransliterator) | ||
foreign import ccall "trans.h &__hs_translit_close_trans" closeTrans | ||
:: FunPtr (Ptr UTransliterator -> IO ()) | ||
foreign import ccall "trans.h __hs_translit_do_trans" doTrans | ||
:: Ptr UTransliterator -> Ptr UChar -> Int32 -> Int32 | ||
-> Ptr UErrorCode -> IO Int32 | ||
foreign import ccall "trans.h __hs_translit_open_trans" | ||
openTrans :: | ||
Ptr UChar -> Int -> Ptr UErrorCode -> IO (Ptr UTransliterator) | ||
|
||
foreign import ccall "trans.h &__hs_translit_close_trans" | ||
closeTrans :: | ||
FunPtr (Ptr UTransliterator -> IO ()) | ||
|
||
foreign import ccall "trans.h __hs_translit_do_trans" | ||
doTrans :: | ||
Ptr UTransliterator -> | ||
Ptr UChar -> | ||
Int32 -> | ||
Int32 -> | ||
Ptr UErrorCode -> | ||
IO Int32 | ||
|
||
data Transliterator = Transliterator { | ||
transPtr :: ForeignPtr UTransliterator, | ||
transSpec :: Text | ||
} | ||
|
||
data Transliterator = Transliterator | ||
{ transPtr :: ForeignPtr UTransliterator, | ||
transSpec :: Text | ||
} | ||
|
||
instance Show Transliterator where | ||
show tr = "Transliterator " ++ show (transSpec tr) | ||
|
||
|
||
show tr = "Transliterator " ++ show (transSpec tr) | ||
|
||
-- we just assume little endian | ||
transliterator :: Text -> IO Transliterator | ||
transliterator spec = | ||
useAsPtr spec $ \ptr len -> do | ||
q <- handleError $ openTrans ptr (fromIntegral len) | ||
ref <- newForeignPtr closeTrans q | ||
touchForeignPtr ref | ||
return $ Transliterator ref spec | ||
|
||
transliterator spec = do | ||
let specStr :: ByteString = T.encodeUtf16LE spec | ||
BS.useAsCStringLen specStr $ \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> do | ||
q <- handleError $ openTrans ptr (fromIntegral len) | ||
ref <- newForeignPtr closeTrans q | ||
return $ Transliterator ref spec | ||
|
||
transliterate :: Transliterator -> Text -> IO Text | ||
transliterate tr txt = do | ||
(fptr, len) <- asForeignPtr txt | ||
withForeignPtr fptr $ \ptr -> | ||
withForeignPtr (transPtr tr) $ \tr_ptr -> do | ||
handleFilledOverflowError ptr (fromIntegral len) | ||
(\dptr dlen -> | ||
doTrans tr_ptr dptr (fromIntegral len) (fromIntegral dlen)) | ||
(\dptr dlen -> | ||
fromPtr (castPtr dptr) (fromIntegral dlen)) | ||
|
||
|
||
let txtAsBs :: ByteString = T.encodeUtf16LE txt | ||
BS.useAsCStringLen txtAsBs \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> | ||
withForeignPtr (transPtr tr) $ \tr_ptr -> do | ||
handleFilledOverflowError | ||
ptr | ||
len | ||
( \dptr dlen -> | ||
doTrans tr_ptr dptr (fromIntegral len) dlen | ||
) | ||
( \dptr dlen -> | ||
T.decodeUtf16LE <$> BS.packCStringLen (castPtr dptr, dlen * 2) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,2 @@ | ||
|
||
import Data.Text.ICU.Translit | ||
import Data.Text.IO as IO | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
packages: | ||
. | ||
|
||
test-show-details: direct |
Oops, something went wrong.