From 785155d861fa9e5dbc67e1e6ccfd000d67da3240 Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Thu, 26 Oct 2023 11:57:11 +0200 Subject: [PATCH 1/6] [feat] upgrade to text 2.0 - convert from ut8 based text to utf16 based CString - use modern test framework to replace old one - add nix setup - remove some unnecessary files - add github actions setup - setup cabal project to adhere to conventions --- .DUMMY | 0 .envrc | 1 + .github/workflows/test-flake.yml | 21 +++ .github/workflows/update-flake-lock.yml | 21 +++ .gitignore | 4 + .travis.yml | 8 - Data/Text/ICU/Translit.hs | 23 +-- Data/Text/ICU/Translit/IO.hs | 90 ++++++----- Data/Text/ICU/Translit/Play.hs | 4 - Setup.hs | 2 - app/Main.hs | 28 ++++ cabal.project | 4 + flake.lock | 205 ++++++++++++++++++++++++ flake.nix | 58 +++++++ fourmolu.yaml | 12 ++ hie.yaml | 10 ++ test/Main.hs | 57 +++++++ tests/Test.hs | 64 -------- text-icu-translit.cabal | 109 ++++++++----- translit.hs | 36 ----- 20 files changed, 552 insertions(+), 205 deletions(-) delete mode 100644 .DUMMY create mode 100644 .envrc create mode 100644 .github/workflows/test-flake.yml create mode 100644 .github/workflows/update-flake-lock.yml create mode 100644 .gitignore delete mode 100644 .travis.yml delete mode 100644 Setup.hs create mode 100644 app/Main.hs create mode 100644 cabal.project create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 fourmolu.yaml create mode 100644 hie.yaml create mode 100644 test/Main.hs delete mode 100644 tests/Test.hs delete mode 100644 translit.hs diff --git a/.DUMMY b/.DUMMY deleted file mode 100644 index e69de29..0000000 diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..80377dd --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake -Lv diff --git a/.github/workflows/test-flake.yml b/.github/workflows/test-flake.yml new file mode 100644 index 0000000..b0e0717 --- /dev/null +++ b/.github/workflows/test-flake.yml @@ -0,0 +1,21 @@ +name: "Check Flake" +on: + workflow_dispatch: + pull_request: + push: +jobs: + check-flake: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: cachix/install-nix-action@v22 + with: + github_access_token: ${{ secrets.GITHUB_TOKEN }} + - name: "flake check" + run: nix flake check -Lv --allow-import-from-derivation --fallback --accept-flake-config + - name: GHC 9.2 + run: nix build .#ghc92-text-icu-translit -Lv --fallback --accept-flake-config + - name: GHC 9.4 + run: nix build .#ghc94-text-icu-translit -Lv --fallback --accept-flake-config + - name: GHC 9.6 + run: nix build .#ghc96-text-icu-translit -Lv --fallback --accept-flake-config diff --git a/.github/workflows/update-flake-lock.yml b/.github/workflows/update-flake-lock.yml new file mode 100644 index 0000000..9dfc825 --- /dev/null +++ b/.github/workflows/update-flake-lock.yml @@ -0,0 +1,21 @@ +name: update-flake-lock +on: + workflow_dispatch: # allows manual triggering + schedule: + - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00 + +jobs: + lockfile: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@main + - name: Update flake.lock + uses: DeterminateSystems/update-flake-lock@main + with: + pr-title: "Update flake.lock" # Title of PR to be created + pr-labels: | # Labels to be set on the PR + dependencies + automated diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7c45ea7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.direnv +result* +dist-newstyle +.pre-commit-config.yaml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0404594..0000000 --- a/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -language: haskell -ghc: - - 7.6 - - 7.8 - -install: - - sudo apt-get install libicu-dev - - cabal install --enable-tests --only-dependencies diff --git a/Data/Text/ICU/Translit.hs b/Data/Text/ICU/Translit.hs index c82d92f..c68b214 100644 --- a/Data/Text/ICU/Translit.hs +++ b/Data/Text/ICU/Translit.hs @@ -2,39 +2,32 @@ -- Module: Data.Text.ICU.Translit -- License: BSD-style -- Maintainer: me@lelf.lu --- +-- -- This module provides the bindings to the transliteration features -- by the ICU (International Components for Unicode) library. --- +-- -- >>> IO.putStrLn $ transliterate (trans "name-any; ru") "\\N{RABBIT FACE} Nu pogodi!" -- 🐰 Ну погоди! --- +-- -- >>> IO.putStrLn $ transliterate (trans "nl-title") "gelderse ijssel" -- Gelderse IJssel --- +-- -- >>> IO.putStrLn $ transliterate (trans "ja") "Amsterdam" -- アムステルダム --- +-- -- More information about the rules is -- . +module Data.Text.ICU.Translit (IO.Transliterator, trans, transliterate) where - - -module Data.Text.ICU.Translit - (IO.Transliterator, trans, transliterate) where - -import qualified Data.Text.ICU.Translit.IO as IO -import System.IO.Unsafe import Data.Text - +import Data.Text.ICU.Translit.IO qualified as IO +import System.IO.Unsafe -- | Construct new transliterator by name. Will throw an error if -- there is no such transliterator trans :: Text -> IO.Transliterator trans t = unsafePerformIO $ IO.transliterator t - -- | Transliterate the text using the transliterator transliterate :: IO.Transliterator -> Text -> Text transliterate tr txt = unsafePerformIO $ IO.transliterate tr txt - diff --git a/Data/Text/ICU/Translit/IO.hs b/Data/Text/ICU/Translit/IO.hs index 7c617d8..257966e 100644 --- a/Data/Text/ICU/Translit/IO.hs +++ b/Data/Text/ICU/Translit/IO.hs @@ -1,54 +1,70 @@ module Data.Text.ICU.Translit.IO - (Transliterator, - transliterator, - transliterate) where + ( Transliterator + , transliterator + , transliterate + ) +where -import Foreign -import Data.Text -import Data.Text.Foreign +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.Text (Text) +import Data.Text.Encoding qualified as T import Data.Text.ICU.Translit.ICUHelper + ( UChar + , UErrorCode + , handleError + , handleFilledOverflowError + ) +import Foreign data UTransliterator -foreign import ccall "trans.h __hs_translit_open_trans" openTrans +foreign import ccall "trans.h __hs_translit_open_trans" + openTrans :: Ptr UChar -> Int -> Ptr UErrorCode -> IO (Ptr UTransliterator) -foreign import ccall "trans.h &__hs_translit_close_trans" closeTrans - :: FunPtr (Ptr UTransliterator -> IO ()) -foreign import ccall "trans.h __hs_translit_do_trans" doTrans - :: Ptr UTransliterator -> Ptr UChar -> Int32 -> Int32 - -> Ptr UErrorCode -> IO Int32 - +foreign import ccall "trans.h &__hs_translit_close_trans" + closeTrans + :: FunPtr (Ptr UTransliterator -> IO ()) -data Transliterator = Transliterator { - transPtr :: ForeignPtr UTransliterator, - transSpec :: Text - } +foreign import ccall "trans.h __hs_translit_do_trans" + doTrans + :: Ptr UTransliterator + -> Ptr UChar + -> Int32 + -> Int32 + -> Ptr UErrorCode + -> IO Int32 +data Transliterator = Transliterator + { transPtr :: ForeignPtr UTransliterator + , transSpec :: Text + } instance Show Transliterator where - show tr = "Transliterator " ++ show (transSpec tr) - - + show tr = "Transliterator " ++ show (transSpec tr) +-- we just assume little endian transliterator :: Text -> IO Transliterator -transliterator spec = - useAsPtr spec $ \ptr len -> do - q <- handleError $ openTrans ptr (fromIntegral len) - ref <- newForeignPtr closeTrans q - touchForeignPtr ref - return $ Transliterator ref spec - +transliterator spec = do + let specStr :: ByteString = T.encodeUtf16LE spec + BS.useAsCStringLen specStr $ \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> do + q <- handleError $ openTrans ptr (fromIntegral len) + ref <- newForeignPtr closeTrans q + touchForeignPtr ref + return $ Transliterator ref spec transliterate :: Transliterator -> Text -> IO Text transliterate tr txt = do - (fptr, len) <- asForeignPtr txt - withForeignPtr fptr $ \ptr -> - withForeignPtr (transPtr tr) $ \tr_ptr -> do - handleFilledOverflowError ptr (fromIntegral len) - (\dptr dlen -> - doTrans tr_ptr dptr (fromIntegral len) (fromIntegral dlen)) - (\dptr dlen -> - fromPtr (castPtr dptr) (fromIntegral dlen)) - - + let txtAsBs :: ByteString = T.encodeUtf16LE txt + BS.useAsCStringLen txtAsBs \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> + withForeignPtr (transPtr tr) $ \tr_ptr -> do + handleFilledOverflowError + ptr + len + ( \dptr dlen -> + doTrans tr_ptr dptr (fromIntegral len) dlen + ) + ( \dptr dlen -> + T.decodeUtf16LE <$> BS.packCStringLen (castPtr dptr, dlen * 2) + ) diff --git a/Data/Text/ICU/Translit/Play.hs b/Data/Text/ICU/Translit/Play.hs index 0f20dd9..7355789 100644 --- a/Data/Text/ICU/Translit/Play.hs +++ b/Data/Text/ICU/Translit/Play.hs @@ -1,6 +1,2 @@ - import Data.Text.ICU.Translit import Data.Text.IO as IO - - - diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..a9bcf08 --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,28 @@ +module Main (main) where + +import Data.Text qualified as T +import Data.Text.ICU.Translit (trans, transliterate) +import Lens.Family2 (over) +import Pipes (runEffect, (>->)) +import Pipes.Group as PG (maps) +import Pipes.Prelude as Pipes (map) +import Pipes.Text as PT (lines) +import Pipes.Text.IO as PT (stdin, stdout) +import System.Environment (getArgs) + +main :: IO () +main = do + args <- getArgs + case args of + [rule] -> + go' $ fun (T.pack rule) + _ -> + error "Usage" + where + fun :: T.Text -> T.Text -> T.Text + fun rule = transliterate (trans rule) + + go' f = + runEffect $ + over PT.lines (PG.maps (>-> Pipes.map f)) PT.stdin + >-> PT.stdout diff --git a/cabal.project b/cabal.project new file mode 100644 index 0000000..60f966e --- /dev/null +++ b/cabal.project @@ -0,0 +1,4 @@ +packages: + . + +test-show-details: direct diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..3672c35 --- /dev/null +++ b/flake.lock @@ -0,0 +1,205 @@ +{ + "nodes": { + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1673956053, + "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1685518550, + "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "gitignore": { + "inputs": { + "nixpkgs": [ + "pre-commit-hooks", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1660459072, + "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=", + "owner": "hercules-ci", + "repo": "gitignore.nix", + "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "gitignore.nix", + "type": "github" + } + }, + "haskell-flake": { + "locked": { + "lastModified": 1698155710, + "narHash": "sha256-WuAVmR0EP/DJDpx0p2wrmijL4sYYhqTixHPDPFXMOXI=", + "owner": "srid", + "repo": "haskell-flake", + "rev": "4f6a2cbe4ee9024ffebb834473592db6c6b2c943", + "type": "github" + }, + "original": { + "owner": "srid", + "repo": "haskell-flake", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1698266953, + "narHash": "sha256-jf72t7pC8+8h8fUslUYbWTX5rKsRwOzRMX8jJsGqDXA=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "75a52265bda7fd25e06e3a67dee3f0354e73243c", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-lib": { + "locked": { + "dir": "lib", + "lastModified": 1696019113, + "narHash": "sha256-X3+DKYWJm93DRSdC5M6K5hLqzSya9BjibtBsuARoPco=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "f5892ddac112a1e9b3612c39af1b72987ee5783a", + "type": "github" + }, + "original": { + "dir": "lib", + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-stable": { + "locked": { + "lastModified": 1685801374, + "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "c37ca420157f4abc31e26f436c1145f8951ff373", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-23.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1689261696, + "narHash": "sha256-LzfUtFs9MQRvIoQ3MfgSuipBVMXslMPH/vZ+nM40LkA=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "df1eee2aa65052a18121ed4971081576b25d6b5c", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1696343447, + "narHash": "sha256-B2xAZKLkkeRFG5XcHHSXXcP7To9Xzr59KXeZiRf4vdQ=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "c9afaba3dfa4085dbd2ccb38dfade5141e33d9d4", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "pre-commit-hooks": { + "inputs": { + "flake-compat": "flake-compat", + "flake-utils": "flake-utils", + "gitignore": "gitignore", + "nixpkgs": "nixpkgs_2", + "nixpkgs-stable": "nixpkgs-stable" + }, + "locked": { + "lastModified": 1698227354, + "narHash": "sha256-Fi5H9jbaQLmLw9qBi/mkR33CoFjNbobo5xWdX4tKz1Q=", + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "rev": "bd38df3d508dfcdff52cd243d297f218ed2257bf", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "type": "github" + } + }, + "root": { + "inputs": { + "haskell-flake": "haskell-flake", + "nixpkgs": "nixpkgs", + "parts": "parts", + "pre-commit-hooks": "pre-commit-hooks" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..3269026 --- /dev/null +++ b/flake.nix @@ -0,0 +1,58 @@ +{ + nixConfig.allow-import-from-derivation = true; + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + parts.url = "github:hercules-ci/flake-parts"; + haskell-flake.url = "github:srid/haskell-flake"; + pre-commit-hooks.url = "github:cachix/pre-commit-hooks.nix"; + }; + outputs = inputs: + inputs.parts.lib.mkFlake {inherit inputs;} { + systems = ["x86_64-linux"]; + imports = [ + inputs.haskell-flake.flakeModule + inputs.pre-commit-hooks.flakeModule + ]; + + perSystem = { + config, + pkgs, + ... + }: { + pre-commit = { + check.enable = true; + settings.hooks = { + cabal-fmt.enable = true; + fourmolu.enable = true; + hlint.enable = true; + + alejandra.enable = true; + statix.enable = true; + deadnix.enable = true; + }; + }; + packages.default = config.packages.ghc94-text-icu-translit; + devShells.default = config.devShells.ghc94; + haskellProjects = { + ghc92 = { + packages = {}; + settings = {}; + basePackages = pkgs.haskell.packages.ghc92; + devShell.mkShellArgs.shellHook = config.pre-commit.installationScript; + }; + ghc94 = { + packages = {}; + settings = {}; + basePackages = pkgs.haskell.packages.ghc94; + devShell.mkShellArgs.shellHook = config.pre-commit.installationScript; + }; + ghc96 = { + packages = {}; + settings = {}; + basePackages = pkgs.haskell.packages.ghc96; + devShell.mkShellArgs.shellHook = config.pre-commit.installationScript; + }; + }; + }; + }; +} diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..b6ae562 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,12 @@ +indentation: 2 +function-arrows: leading +comma-style: leading +import-export-style: leading +indent-wheres: false +record-brace-space: true +newlines-between-decls: 1 +haddock-style: single-line +let-style: inline +in-style: right-align +respectful: false +single-constraint-parens: never diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 0000000..488b90e --- /dev/null +++ b/hie.yaml @@ -0,0 +1,10 @@ +cradle: + cabal: + - path: "./" + component: "lib:text-icu-translit" + + - path: "app/Main.hs" + component: "text-icu-translit:exe:translit" + + - path: "test" + component: "text-icu-translit:test:text-icu-translit-test" diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..2dc2d0f --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Main (main) where + +import Data.Char (ord) +import Data.Text (Text) +import Data.Text qualified as T +import Data.Text.ICU qualified as U +import Data.Text.ICU.Translit (trans, transliterate) +import Test.Hspec +import Test.Hspec.QuickCheck +import Test.QuickCheck (Arbitrary (arbitrary, shrink), Property, elements, (===)) +import Text.Printf (printf) + +instance Arbitrary Text where + arbitrary = fmap T.pack arbitrary + shrink = fmap T.pack . shrink . T.unpack + +newtype IdempTr = IdempTr Text deriving (Show) + +instance Arbitrary IdempTr where + arbitrary = elements transes0 + where + transes0 = map IdempTr ["ru-en", "en-ru"] + +hexUnicode :: Text -> Text +hexUnicode txt = T.pack $ concat [fmt c | c <- T.unpack txt] + where + fmt c = printf (if ord c < 0x10000 then "U+%04X" else "U+%X") (ord c) + +prop_idemp :: (IdempTr, Text) -> Property +prop_idemp (IdempTr t, s) = transliterate tr (transliterate tr s) === transliterate tr s + where + tr = trans t + +prop_toLower :: Text -> Property +prop_toLower t = U.toLower U.Root t === transliterate (trans "Lower") t + +{- +NOTE: these two do not disagree and I think they're not supposed to +prop_toLower :: Text -> Property +prop_toLower t = T.toLower t === transliterate (trans "Lower") t +-} + +prop_hexUnicode :: Text -> Property +prop_hexUnicode t = hexUnicode t === transliterate (trans "hex/unicode") t + +main :: IO () +main = hspec spec + +spec :: Spec +spec = + describe "properties" $ modifyMaxSuccess (const 10000) do + prop "idempotence" prop_idemp + prop "toLower" prop_toLower + prop "hexUnicode" prop_hexUnicode diff --git a/tests/Test.hs b/tests/Test.hs deleted file mode 100644 index 6e52055..0000000 --- a/tests/Test.hs +++ /dev/null @@ -1,64 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - -import Test.QuickCheck -import Data.Text (Text) -import qualified Data.Text as T -import qualified Data.Text.ICU as U -import Data.Text.ICU.Translit -import Text.Printf -import Data.Char - -import Test.Framework -import Test.Framework.Providers.QuickCheck2 - --- newtype Random a = Random { unRandom :: a } deriving Show - - -instance Arbitrary Text where - arbitrary = fmap T.pack arbitrary - shrink = fmap T.pack . shrink . T.unpack - --- instance Arbitrary a => Arbitrary (Random a) where --- arbitrary = fmap Random arbitrary - - -samples :: [Text] -samples = ["hello","текст","维基百科:海納百川,有容乃大"] - - -newtype IdempTr = IdempTr Text deriving Show - - - -instance Arbitrary IdempTr where - arbitrary = elements transes0 - where transes0 = map IdempTr ["ru-en", "en-ru"] - -hexUnicode :: Text -> Text -hexUnicode txt = T.pack $ concat [ fmt c | c <- T.unpack txt ] - where fmt c = printf (if ord c < 0x10000 then "U+%04X" else "U+%X") (ord c) - - -prop_idemp (IdempTr t,s) = transliterate tr (transliterate tr s) == transliterate tr s - where tr = trans t -prop_toLower' t = U.toLower U.Root t == transliterate (trans "Lower") t -prop_toLower t = T.toLower t == transliterate (trans "Lower") t -prop_NFC t = U.normalize U.NFC t == transliterate (trans "NFC") t -prop_hexUnicode t = hexUnicode t == transliterate (trans "hex/unicode") t - - - -main = defaultMain [tests] - -tests :: Test -tests = testGroup "props" [ - testProperty "idemp" prop_idemp, - testProperty "toLower'" prop_toLower', - testProperty "toLower" prop_toLower, - testProperty "NFC" prop_NFC, - testProperty "hexUnicode" prop_hexUnicode - ] - - - - diff --git a/text-icu-translit.cabal b/text-icu-translit.cabal index 625dc2b..14c3a8d 100644 --- a/text-icu-translit.cabal +++ b/text-icu-translit.cabal @@ -1,55 +1,87 @@ - -name: text-icu-translit -version: 0.1.0.7 -synopsis: ICU transliteration +name: text-icu-translit +version: 0.1.0.7 +synopsis: ICU transliteration description: Bindings to the transliteration features by the International Components for Unicode (ICU) library -license: BSD3 -license-file: LICENSE -author: Antonio Nikishaev -maintainer: Antonio Nikishaev -copyright: 2014 Antonio Nikishaev -bug-reports: https://github.com/llelf/text-icu-translit/issues -category: Text -build-type: Simple -cabal-version: >=1.10 -extra-source-files: include/trans.h +license: BSD3 +license-file: LICENSE +author: Antonio Nikishaev +maintainer: Antonio Nikishaev +copyright: 2014 Antonio Nikishaev +bug-reports: https://github.com/llelf/text-icu-translit/issues +category: Text +build-type: Simple +cabal-version: >=1.10 +extra-source-files: include/trans.h library - exposed-modules: Data.Text.ICU.Translit, - Data.Text.ICU.Translit.IO - other-modules: Data.Text.ICU.Translit.ICUHelper + exposed-modules: + Data.Text.ICU.Translit + Data.Text.ICU.Translit.IO - ghc-options: -Wall + other-modules: Data.Text.ICU.Translit.ICUHelper + ghc-options: -Wall + c-sources: cbits/trans.c + include-dirs: include + extra-libraries: icuuc - c-sources: cbits/trans.c - include-dirs: include + if os(windows) + extra-libraries: + icuin + icudt - extra-libraries: icuuc - if os(mingw32) - extra-libraries: icuin icudt else - extra-libraries: icui18n icudata + extra-libraries: + icui18n + icudata + + default-extensions: + BlockArguments + ViewPatterns + + build-depends: + base >=4 && <5 + , bytestring + , text >=2.0 - -- other-extensions: - build-depends: base >=4 && <5, text >= 1.0 -- hs-source-dirs: - default-language: Haskell2010 + default-language: GHC2021 -test-suite tests - type: exitcode-stdio-1.0 - default-language: Haskell2010 - hs-source-dirs: tests - main-is: Test.hs - build-depends: base, text, - text-icu >= 0.6.3, - text-icu-translit, - QuickCheck >= 2.4, - test-framework >= 0.8, - test-framework-quickcheck2 >= 0.3 +executable translit + default-language: GHC2021 + hs-source-dirs: app + ghc-options: -Wall + main-is: Main.hs + build-depends: + base + , lens + , lens-family + , pipes + , pipes-group + , pipes-text + , text + , text-icu + , text-icu-translit +test-suite text-icu-translit-test + type: exitcode-stdio-1.0 + default-language: GHC2021 + hs-source-dirs: test + main-is: Main.hs + ghc-options: -Wall + default-extensions: + BlockArguments + ViewPatterns + + build-depends: + base + , hspec + , QuickCheck + , text + , text-icu + , text-icu-translit source-repository head type: darcs @@ -58,4 +90,3 @@ source-repository head source-repository head type: git location: https://github.com/llelf/text-icu-translit - diff --git a/translit.hs b/translit.hs deleted file mode 100644 index 76132fc..0000000 --- a/translit.hs +++ /dev/null @@ -1,36 +0,0 @@ - -import Data.Text.ICU.Translit -import System.Environment -import qualified Data.Text.IO as T -import qualified Data.Text as T -import System.IO -import Control.Monad - -import Pipes -import Pipes.Group as PG -import Pipes.Prelude as Pipes -import Pipes.Text.IO as PT -import Pipes.Text as PT -import Lens.Family2 - -main = do args <- getArgs - case args of - [rule] -> - go' $ fun (T.pack rule) - _ -> - error "Usage" - where - fun :: T.Text -> T.Text -> T.Text - fun rule = transliterate (trans rule) - - go' f = runEffect $ - over PT.lines (PG.maps (>-> Pipes.map f)) PT.stdin >-> PT.stdout - - go :: (T.Text -> T.Text) -> IO () - go f = do e <- isEOF - unless e $ do s <- T.getLine - T.putStrLn (f s) - go f - - - From 753e547a1fcb02c77f64f025fb2bd3bc7650d1b0 Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Thu, 26 Oct 2023 16:25:59 +0200 Subject: [PATCH 2/6] [fix] check that we're building on either x86_64 or aarch64, don't build on ghc 9.2 --- .github/workflows/test-flake.yml | 2 -- flake.nix | 8 +------- text-icu-translit.cabal | 4 ++++ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-flake.yml b/.github/workflows/test-flake.yml index b0e0717..6d2ae65 100644 --- a/.github/workflows/test-flake.yml +++ b/.github/workflows/test-flake.yml @@ -13,8 +13,6 @@ jobs: github_access_token: ${{ secrets.GITHUB_TOKEN }} - name: "flake check" run: nix flake check -Lv --allow-import-from-derivation --fallback --accept-flake-config - - name: GHC 9.2 - run: nix build .#ghc92-text-icu-translit -Lv --fallback --accept-flake-config - name: GHC 9.4 run: nix build .#ghc94-text-icu-translit -Lv --fallback --accept-flake-config - name: GHC 9.6 diff --git a/flake.nix b/flake.nix index 3269026..0976eaf 100644 --- a/flake.nix +++ b/flake.nix @@ -8,7 +8,7 @@ }; outputs = inputs: inputs.parts.lib.mkFlake {inherit inputs;} { - systems = ["x86_64-linux"]; + systems = ["x86_64-linux" "aarch64-linux" "aarch64-darwin"]; imports = [ inputs.haskell-flake.flakeModule inputs.pre-commit-hooks.flakeModule @@ -34,12 +34,6 @@ packages.default = config.packages.ghc94-text-icu-translit; devShells.default = config.devShells.ghc94; haskellProjects = { - ghc92 = { - packages = {}; - settings = {}; - basePackages = pkgs.haskell.packages.ghc92; - devShell.mkShellArgs.shellHook = config.pre-commit.installationScript; - }; ghc94 = { packages = {}; settings = {}; diff --git a/text-icu-translit.cabal b/text-icu-translit.cabal index 14c3a8d..bf2a903 100644 --- a/text-icu-translit.cabal +++ b/text-icu-translit.cabal @@ -54,6 +54,10 @@ executable translit hs-source-dirs: app ghc-options: -Wall main-is: Main.hs + + if !(arch(x86_64) || arch(aarch64)) + buildable: False + build-depends: base , lens From 92e9ce8dc5d4fc987d12c68799efa869df2b9933 Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Thu, 26 Oct 2023 16:41:41 +0200 Subject: [PATCH 3/6] [fix] remove executable that adds unnecessary, very large dependencies --- app/Main.hs | 28 ---------------------------- text-icu-translit.cabal | 23 +++-------------------- 2 files changed, 3 insertions(+), 48 deletions(-) delete mode 100644 app/Main.hs diff --git a/app/Main.hs b/app/Main.hs deleted file mode 100644 index a9bcf08..0000000 --- a/app/Main.hs +++ /dev/null @@ -1,28 +0,0 @@ -module Main (main) where - -import Data.Text qualified as T -import Data.Text.ICU.Translit (trans, transliterate) -import Lens.Family2 (over) -import Pipes (runEffect, (>->)) -import Pipes.Group as PG (maps) -import Pipes.Prelude as Pipes (map) -import Pipes.Text as PT (lines) -import Pipes.Text.IO as PT (stdin, stdout) -import System.Environment (getArgs) - -main :: IO () -main = do - args <- getArgs - case args of - [rule] -> - go' $ fun (T.pack rule) - _ -> - error "Usage" - where - fun :: T.Text -> T.Text -> T.Text - fun rule = transliterate (trans rule) - - go' f = - runEffect $ - over PT.lines (PG.maps (>-> Pipes.map f)) PT.stdin - >-> PT.stdout diff --git a/text-icu-translit.cabal b/text-icu-translit.cabal index bf2a903..8caf10a 100644 --- a/text-icu-translit.cabal +++ b/text-icu-translit.cabal @@ -27,6 +27,9 @@ library include-dirs: include extra-libraries: icuuc + if !(arch(x86_64) || arch(aarch64)) + buildable: False + if os(windows) extra-libraries: icuin @@ -49,26 +52,6 @@ library -- hs-source-dirs: default-language: GHC2021 -executable translit - default-language: GHC2021 - hs-source-dirs: app - ghc-options: -Wall - main-is: Main.hs - - if !(arch(x86_64) || arch(aarch64)) - buildable: False - - build-depends: - base - , lens - , lens-family - , pipes - , pipes-group - , pipes-text - , text - , text-icu - , text-icu-translit - test-suite text-icu-translit-test type: exitcode-stdio-1.0 default-language: GHC2021 From cf9ad8fd42d656f66b51f856be077e79bdfc1904 Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Wed, 1 Nov 2023 12:04:02 +0100 Subject: [PATCH 4/6] [chore] switch from fourmolu to ormolu, apply fixes requested by Paolo - remove touchForeignPtr (because that's kinda rude) - change from fourmolu to ormolu and reformat with the latter --- .envrc | 2 +- Data/Text/ICU/Translit/IO.hs | 42 ++++++++++++++++++------------------ flake.nix | 2 +- fourmolu.yaml | 12 ----------- test/Main.hs | 12 +++++------ 5 files changed, 29 insertions(+), 41 deletions(-) delete mode 100644 fourmolu.yaml diff --git a/.envrc b/.envrc index 80377dd..f7fb220 100644 --- a/.envrc +++ b/.envrc @@ -1 +1 @@ -use flake -Lv +use flake . -Lv diff --git a/Data/Text/ICU/Translit/IO.hs b/Data/Text/ICU/Translit/IO.hs index 257966e..8cc0101 100644 --- a/Data/Text/ICU/Translit/IO.hs +++ b/Data/Text/ICU/Translit/IO.hs @@ -1,7 +1,7 @@ module Data.Text.ICU.Translit.IO - ( Transliterator - , transliterator - , transliterate + ( Transliterator, + transliterator, + transliterate, ) where @@ -10,35 +10,35 @@ import Data.ByteString qualified as BS import Data.Text (Text) import Data.Text.Encoding qualified as T import Data.Text.ICU.Translit.ICUHelper - ( UChar - , UErrorCode - , handleError - , handleFilledOverflowError + ( UChar, + UErrorCode, + handleError, + handleFilledOverflowError, ) import Foreign data UTransliterator foreign import ccall "trans.h __hs_translit_open_trans" - openTrans - :: Ptr UChar -> Int -> Ptr UErrorCode -> IO (Ptr UTransliterator) + openTrans :: + Ptr UChar -> Int -> Ptr UErrorCode -> IO (Ptr UTransliterator) foreign import ccall "trans.h &__hs_translit_close_trans" - closeTrans - :: FunPtr (Ptr UTransliterator -> IO ()) + closeTrans :: + FunPtr (Ptr UTransliterator -> IO ()) foreign import ccall "trans.h __hs_translit_do_trans" - doTrans - :: Ptr UTransliterator - -> Ptr UChar - -> Int32 - -> Int32 - -> Ptr UErrorCode - -> IO Int32 + doTrans :: + Ptr UTransliterator -> + Ptr UChar -> + Int32 -> + Int32 -> + Ptr UErrorCode -> + IO Int32 data Transliterator = Transliterator - { transPtr :: ForeignPtr UTransliterator - , transSpec :: Text + { transPtr :: ForeignPtr UTransliterator, + transSpec :: Text } instance Show Transliterator where @@ -51,7 +51,7 @@ transliterator spec = do BS.useAsCStringLen specStr $ \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> do q <- handleError $ openTrans ptr (fromIntegral len) ref <- newForeignPtr closeTrans q - touchForeignPtr ref + -- touchForeignPtr ref return $ Transliterator ref spec transliterate :: Transliterator -> Text -> IO Text diff --git a/flake.nix b/flake.nix index 0976eaf..efdde78 100644 --- a/flake.nix +++ b/flake.nix @@ -23,7 +23,7 @@ check.enable = true; settings.hooks = { cabal-fmt.enable = true; - fourmolu.enable = true; + ormolu.enable = true; hlint.enable = true; alejandra.enable = true; diff --git a/fourmolu.yaml b/fourmolu.yaml deleted file mode 100644 index b6ae562..0000000 --- a/fourmolu.yaml +++ /dev/null @@ -1,12 +0,0 @@ -indentation: 2 -function-arrows: leading -comma-style: leading -import-export-style: leading -indent-wheres: false -record-brace-space: true -newlines-between-decls: 1 -haddock-style: single-line -let-style: inline -in-style: right-align -respectful: false -single-constraint-parens: never diff --git a/test/Main.hs b/test/Main.hs index 2dc2d0f..de1647c 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -21,18 +21,18 @@ newtype IdempTr = IdempTr Text deriving (Show) instance Arbitrary IdempTr where arbitrary = elements transes0 - where - transes0 = map IdempTr ["ru-en", "en-ru"] + where + transes0 = map IdempTr ["ru-en", "en-ru"] hexUnicode :: Text -> Text hexUnicode txt = T.pack $ concat [fmt c | c <- T.unpack txt] - where - fmt c = printf (if ord c < 0x10000 then "U+%04X" else "U+%X") (ord c) + where + fmt c = printf (if ord c < 0x10000 then "U+%04X" else "U+%X") (ord c) prop_idemp :: (IdempTr, Text) -> Property prop_idemp (IdempTr t, s) = transliterate tr (transliterate tr s) === transliterate tr s - where - tr = trans t + where + tr = trans t prop_toLower :: Text -> Property prop_toLower t = U.toLower U.Root t === transliterate (trans "Lower") t From 1d717389352dc3f527457f4317e63b402bb684a4 Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Wed, 1 Nov 2023 12:08:27 +0100 Subject: [PATCH 5/6] [chore] document the devshell setup --- README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6234003..39c0c7c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -ICU transliteration for Haskell +# ICU transliteration for Haskell >>> IO.putStrLn $ transliterate (trans "name-any; ru") "\\N{RABBIT FACE} Nu pogodi!" 🐰 Ну погоди! @@ -9,3 +9,22 @@ ICU transliteration for Haskell >>> IO.putStrLn $ transliterate (trans "ja") "Amsterdam" アムステルダム +## Developing + +- to see available outputs (targets), run + ```sh + nix flake show --allow-import-from-derivation + ``` +- with `flakes` and `nix command` enabled, run + ```sh + nix develop -Lv + ``` + to be dropped into a `devShell` or, alternatively, if you use `direnv`, run + ```sh + direnv allow + ``` +- to build and run the tests, run + ```sh + nix build -Lv + ``` +- refer to the [flake parts](https://flake.parts) and the [haskell flake](https://zero-to-flakes.com/haskell-flake/) documentations if you want to change the flake configs From 2889086194b945942e81f433d9a9a42b76fab57a Mon Sep 17 00:00:00 2001 From: Magnus Viernickel Date: Wed, 1 Nov 2023 12:09:39 +0100 Subject: [PATCH 6/6] [fix] remove touchForeignPtr --- Data/Text/ICU/Translit/IO.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/Data/Text/ICU/Translit/IO.hs b/Data/Text/ICU/Translit/IO.hs index 8cc0101..2a487e0 100644 --- a/Data/Text/ICU/Translit/IO.hs +++ b/Data/Text/ICU/Translit/IO.hs @@ -51,7 +51,6 @@ transliterator spec = do BS.useAsCStringLen specStr $ \((castPtr @_ @Word16) -> ptr, (`div` 2) -> len) -> do q <- handleError $ openTrans ptr (fromIntegral len) ref <- newForeignPtr closeTrans q - -- touchForeignPtr ref return $ Transliterator ref spec transliterate :: Transliterator -> Text -> IO Text