From 5343002a4c89fd08c78a1255882f51178aa0a98f Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Wed, 3 Jul 2024 14:39:10 -0500 Subject: [PATCH 01/14] Amassed changes --- .gitignore | 3 ++ .npmignore | 4 ++ README.md | 4 ++ binding.gyp | 75 ++++++++++++++++++++++++++++- index.js | 2 + script/adjust-install-name.sh | 23 +++++++++ script/fetch-libiconv-61.sh | 86 +++++++++++++++++++++++++++++++++ script/find-gnu-libiconv.sh | 90 +++++++++++++++++++++++++++++++++++ 8 files changed, 285 insertions(+), 2 deletions(-) create mode 100755 script/adjust-install-name.sh create mode 100644 script/fetch-libiconv-61.sh create mode 100755 script/find-gnu-libiconv.sh diff --git a/.gitignore b/.gitignore index 51f142db..5592bbdf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,10 @@ node_modules build .DS_Store .clang_complete +ext /browser.js emsdk-portable package-lock.json + +vendor/libiconv diff --git a/.npmignore b/.npmignore index 97d77444..edb606e7 100644 --- a/.npmignore +++ b/.npmignore @@ -8,6 +8,10 @@ !src/bindings/*.h !src/bindings/*.cc +!script/fetch-libiconv-61.sh +!script/find-gnu-libiconv.sh +!script/adjust-install-name.sh + !vendor/libcxx/* !vendor/pcre/pcre.gyp diff --git a/README.md b/README.md index 6c294cb6..ebd0ca55 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,10 @@ Native library at the core of Atom's text editor. +## Installation notes: + +On macOS 13 and greater, the OS no longer offers GNU `libiconv`. We handle this by downloading it from Apple’s OSS GitHub page and building it as a pre-compilation step. + ## Components: ### Patch diff --git a/binding.gyp b/binding.gyp index 249e9e82..32c9c751 100644 --- a/binding.gyp +++ b/binding.gyp @@ -21,12 +21,40 @@ "src/core", " { const callback = (error, result) => { if (error) { diff --git a/script/adjust-install-name.sh b/script/adjust-install-name.sh new file mode 100755 index 00000000..e68226c7 --- /dev/null +++ b/script/adjust-install-name.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# This script can be used if we find it necessary, for code signing reasons, +# not to alter the install name of `libiconv.2.dylib` during compilation. macOS +# complains when we do it, saying that the code signature has been invalidated, +# but we haven't noticed any ill effects… yet. +# +# But this script would allow us to point `superstring.node` at the correct +# library by figuring out `libicov.2.dylib`’s existing install name, rather +# than setting it to a known value in an earlier step. + +product_dir=$1 + +# Ask for the current install name expected by `superstring.node`. We need to +# know this in order to change it in the next step. +current_install_name=$(otool -L "$product_dir/superstring.node" | awk 'BEGIN{FS=OFS=" "};NR==2{print $1}') + +# Now use `install_name_tool` to tell `superstring.node` to instead look for +# `libiconv.2.dylib` at a path relative to itself. +install_name_tool -change \ + "$current_install_name" \ + "@loader_path/../../vendor/libiconv/lib/libiconv.2.dylib" \ + "$product_dir/superstring.node" diff --git a/script/fetch-libiconv-61.sh b/script/fetch-libiconv-61.sh new file mode 100644 index 00000000..2385bc3d --- /dev/null +++ b/script/fetch-libiconv-61.sh @@ -0,0 +1,86 @@ + +#!/bin/bash + +# The purpose of this script is to find a copy of GNU `libiconv` on this macOS +# machine. Since newer versions of macOS include a FreeBSD `libiconv`, we no +# longer assume it's safe to use any ambient `libiconv.dylib` we find. +# +# For this reason, we try to detect a Homebrew installation of `libiconv`; we +# also allow the user to install GNU `libiconv` manually and specify the path +# via an environment variable. +# +# We might eventually replace this approach with an explicit vendorization of +# the specific files needed, but that would require a universal build of +# `libiconv.2.dylib`. For now, letting the user provide their `libiconv` has +# the advantage of very likely matching the system's architecture. + +echoerr() { echo "$@\n" >&2; } + + +usage() { + echoerr "superstring requires the GNU libiconv library, which macOS no longer bundles in recent versions. This package attempts to compile it from GitHub. If you're seeing this message, something has gone wrong; check the README for information and consider filing an issue." +} + +# Identify the directory of this script. +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +ROOT="$SCRIPT_DIR/.." +SCRATCH="$ROOT/scratch" +EXT="$ROOT/ext" + +cleanup() { + if [ -d "$SCRATCH" ]; then + rm -rf "$SCRATCH" + fi +} +trap cleanup SIGINT EXIT + +create-if-missing() { + if [ -z "$1" ]; then + echoerr "Error: $1 is a file." + usage + exit 1 + fi + if [ ! -d "$1" ]; then + mkdir "$1" + fi +} + +create-if-missing "$EXT" +create-if-missing "$SCRATCH" + +dylib_path="$EXT/lib/libiconv.2.dylib" + +# If this path already exists, we'll assume libiconv has already been fetched +# and compiled. Otherwise we'll do it now. +if [ ! -L "$dylib_path" ]; then + cd $SCRATCH + git clone -b libiconv-61 "https://github.com/apple-oss-distributions/libiconv.git" + cd libiconv/libiconv + ./configure --prefix="$EXT" --libdir="$EXT/lib" + make + make install +fi + +cd $ROOT + +# We expect this path to exist and be a symbolic link that points to a file. +if [ ! -L "$dylib_path" ]; then + echoerr "Error: expected $dylib_path to be present, but it was not. Cannot proceed." + usage + exit 1 +fi + +# Set the install name of this library to something neutral and predictable to +# make a later step easier. +# +# NOTE: macOS complains about this action invalidating the library's code +# signature. This has not been observed to have any negative effects for +# Pulsar, possibly because we sign and notarize the entire app at a later stage +# of the build process. But if it _did_ have negative effects, we could switch +# to a different approach and skip this step. See the `binding.gyp` file for +# further details. + +install_name_tool -id "libiconv.2.dylib" "${dylib_path}" + +cleanup diff --git a/script/find-gnu-libiconv.sh b/script/find-gnu-libiconv.sh new file mode 100755 index 00000000..4565db5d --- /dev/null +++ b/script/find-gnu-libiconv.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# The purpose of this script is to find a copy of GNU `libiconv` on this macOS +# machine. Since newer versions of macOS include a FreeBSD `libiconv`, we no +# longer assume it's safe to use any ambient `libiconv.dylib` we find. +# +# For this reason, we try to detect a Homebrew installation of `libiconv`; we +# also allow the user to install GNU `libiconv` manually and specify the path +# via an environment variable. +# +# We might eventually replace this approach with an explicit vendorization of +# the specific files needed, but that would require a universal build of +# `libiconv.2.dylib`. For now, letting the user provide their `libiconv` has +# the advantage of very likely matching the system's architecture. + +echoerr() { echo "$@\n" >&2; } + +usage() { + echoerr "superstring requires the GNU libiconv library. You can install it with Homebrew (\`brew install libiconv\`) and we'll be able to detect its presence. You may also define a SUPERSTRING_LIBICONV_PATH variable set to the absolute path of your libiconv installation. (This path should have \`lib\` and \`include\` as child directories.)" +} + +# Identify the directory of this script. +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# Find this package's `vendor` directory; make sure it exists. +VENDOR="$SCRIPT_DIR/../vendor" +if [ ! -d "$VENDOR" ]; then + echoerr "Aborting; expected $VENDOR to be a directory, but it was not." + exit 1 +fi + +TARGET="$VENDOR/libiconv" + +# Make a `libiconv` directory for us to vendorize into. +if [ ! -d "$TARGET" ]; then + mkdir "$TARGET" +fi + +if [[ ! -z "${SUPERSTRING_LIBICONV_PATH}" ]]; then + # First, we allow the user to specify a path and override our heuristics. + # This should propagate even if the user ran `yarn install` from a project + # that has `superstring` as a dependency. + source="${SUPERSTRING_LIBICONV_PATH}" +elif command -v brew &> /dev/null; then + # If that variable isn't set, then we check if this machine has Homebrew + # installed. If so, we'll opt into Homebrew's version of `libiconv`. This is + # the safest option because we can reasonably conclude that this `libiconv` + # is the right flavor and matches the system's architecture. + source="$(brew --prefix)/opt/libiconv" +else + # If neither of these things is true, we won't try to add an entry to + # `library_dirs`. + usage + exit 1 +fi + +if [ ! -d "$source" ]; then + echoerr "Expected $source to be the path to GNU libiconv, but it is not a directory. " + usage + exit 1 +fi + +# We expect the `dylib` we need to be at this exact path. +dylib_path="${source}/lib/libiconv.2.dylib" + +if [ ! -f "$dylib_path" ]; then + echoerr "Invalid location for libiconv. Expected to find: ${dylib_path} but it was not present." + usage + exit 1 +fi + +# We need the `include` directory for compilation, plus the `libiconv.2.dylib` +# file. We'll also copy over the README and license files for compliance. +cp -R "${source}/include" "$TARGET/" +cp "${dylib_path}" "$TARGET/lib/" +cp "${source}/COPYING.LIB" "$TARGET/" +cp "${source}/README" "$TARGET/" + + +# Set the install name of this library to something neutral and predictable to +# make a later step easier. +# +# NOTE: macOS complains about this action invalidating the library's code +# signature. This has not been observed to have any negative effects for +# Pulsar, possibly because we sign and notarize the entire app at a later stage +# of the build process. But if it _did_ have negative effects, we could switch +# to a different approach and skip this step. See the `binding.gyp` file for +# further details. + +install_name_tool -id "libiconv.2.dylib" "${dylib_path}" From be89af1052bfe94a31b7cecd0122801ff7afd269 Mon Sep 17 00:00:00 2001 From: Sadick Date: Wed, 16 Jun 2021 17:07:54 +0300 Subject: [PATCH 02/14] Merge pull request #88 from aminya/github-actions GitHub Actions --- .github/workflows/ci.yml | 55 ++++++++++++++++++++++++++++++++++++++++ .travis.yml | 35 ------------------------- appveyor.yml | 25 ++---------------- package.json | 2 +- 4 files changed, 58 insertions(+), 59 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..11737170 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,55 @@ +name: ci +on: + - pull_request + - push + +jobs: + Test: + if: "!contains(github.event.head_commit.message, '[skip ci]')" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + node_version: + - 10 + - 12 + - 14 + name: Node ${{ matrix.node_version }} on ${{ matrix.os }} + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Cache + uses: actions/cache@v2 + with: + path: | + 'node_modules' + key: ${{ runner.os }}-${{ matrix.node_version }}-${{ hashFiles('package.json') }} + + - name: Setup node + uses: actions/setup-node@v2-beta + with: + node-version: ${{ matrix.node_version }} + + - name: Install dependencies + run: npm install + + - name: Lint + run: npm run standard + + - name: Run tests + run: | + npm run test:node + npm run test:native + + Skip: + if: contains(github.event.head_commit.message, '[skip ci]') + runs-on: ubuntu-latest + steps: + - name: Skip CI 🚫 + run: echo skip ci diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1831c829..00000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -language: node_js -sudo: false - -notifications: - email: - on_success: never - on_failure: change - -node_js: - - "8" - - "10" - - "12.14.1" - -before_install: - - export CXX="g++-4.9" CC="gcc-4.9" - -script: - - npm run standard - - npm run test:node - - npm run test:native - -git: - depth: 10 - -branches: - only: - - master - -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-4.9 - - g++-4.9 diff --git a/appveyor.yml b/appveyor.yml index aa43e41e..795da41c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,27 +1,6 @@ -image: Visual Studio 2015 - -environment: - matrix: - - nodejs_version: "8" - - nodejs_version: "12.14.1" - -platform: - - x86 - - x64 - -install: - - ps: Install-Product node $env:nodejs_version - - git submodule update --init - - node --version - - npm --version - - npm install - -test_script: - - npm run standard - - npm run test:node - +# empty appveyor build: off branches: only: - - master + - non-existing diff --git a/package.json b/package.json index 9ea8aa1b..d5df9424 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "build:node": "node-gyp rebuild", "build:browser": "script/build-browser-version.sh", "build": "npm run build:node && npm run build:browser", - "test:native": "script/test-native.js", + "test:native": "node ./script/test-native.js", "test:node": "mocha test/js/*.js", "test:browser": "SUPERSTRING_USE_BROWSER_VERSION=1 mocha test/js/*.js", "test": "npm run test:node && npm run test:browser", From 9cc3b0b2fdecf2cdf75e8fee314ce9630fce4c5f Mon Sep 17 00:00:00 2001 From: sadick254 Date: Wed, 16 Jun 2021 17:10:22 +0300 Subject: [PATCH 03/14] Remove other ci configs and update badge --- .github/workflows/ci.yml | 6 +----- README.md | 7 +------ 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11737170..906ec387 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,10 +14,6 @@ jobs: - ubuntu-latest - macos-latest - windows-latest - node_version: - - 10 - - 12 - - 14 name: Node ${{ matrix.node_version }} on ${{ matrix.os }} steps: @@ -34,7 +30,7 @@ jobs: - name: Setup node uses: actions/setup-node@v2-beta with: - node-version: ${{ matrix.node_version }} + node-version: 14 - name: Install dependencies run: npm install diff --git a/README.md b/README.md index ebd0ca55..a22948f8 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,5 @@ # Superstring -[![macOS Build Status](https://circleci.com/gh/atom/superstring/tree/master.svg?style=shield)](https://circleci.com/gh/atom/superstring) -[![linux Build Status](https://travis-ci.org/atom/superstring.svg?branch=master)](https://travis-ci.org/atom/superstring) -[![Windows Build Status](https://ci.appveyor.com/api/projects/status/n5pack4yk7w80fso/branch/master?svg=true)](https://ci.appveyor.com/project/Atom/superstring/branch/master) -[![Dependency Status](https://david-dm.org/atom/superstring.svg)](https://david-dm.org/atom/superstring) - - +[![ci](https://github.com/atom/superstring/actions/workflows/ci.yml/badge.svg)](https://github.com/atom/superstring/actions/workflows/ci.yml) Native library at the core of Atom's text editor. From 78c838786c315637811bb7ee3403f94ec2d60ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maur=C3=ADcio=20Szabo?= Date: Wed, 27 Jul 2022 11:23:46 -0300 Subject: [PATCH 04/14] Trying a new matrix of Node versions --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 906ec387..79c442ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,10 @@ jobs: - ubuntu-latest - macos-latest - windows-latest + node_version: + - 14 + - 16 + - 18 name: Node ${{ matrix.node_version }} on ${{ matrix.os }} steps: @@ -30,7 +34,7 @@ jobs: - name: Setup node uses: actions/setup-node@v2-beta with: - node-version: 14 + node-version: ${{ matrix.node_version }} - name: Install dependencies run: npm install From c861218d0141c79663f8dfad7e049cb7eaf5bd6d Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 24 Oct 2023 13:30:57 -0400 Subject: [PATCH 05/14] Delete win-iconv submodule We're about to commit it as regular files, so we first we need to get rid of the submodule. --- .gitmodules | 3 --- vendor/win-iconv | 1 - 2 files changed, 4 deletions(-) delete mode 100644 .gitmodules delete mode 160000 vendor/win-iconv diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 07506fdd..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "vendor/win-iconv"] - path = vendor/win-iconv - url = https://github.com/win-iconv/win-iconv diff --git a/vendor/win-iconv b/vendor/win-iconv deleted file mode 160000 index 9f98392d..00000000 --- a/vendor/win-iconv +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9f98392dfecadffd62572e73e9aba878e03496c4 From 2041d9484c31592c23cb47a3ae6064213d065cc7 Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 24 Oct 2023 13:32:43 -0400 Subject: [PATCH 06/14] Add win-iconv files using 'git subtree' This replaces what used to be a *submodule* of the win-iconv repo. Using git subtree preserves revision history, author info (including blame view), and so on. win-iconv is a very small repo, it's public domain, and it has few revisions. So, I think it's worth it to do all that, for giving proper attribution of author info, if for no other reason. Not using a submodule is more compatible with newer npm (npm 7.x+), and apparently with Yarn (v1.x?) as well. It's overall less complicated for end users of the superstring package/repo. --- Add 'vendor/win-iconv/' from commit '9f98392dfecadffd62572e73e9aba878e03496c4' git-subtree-dir: vendor/win-iconv git-subtree-mainline: 5b2f39d3319ab9a568b1afff4a23be9ece458cc1 git-subtree-split: 9f98392dfecadffd62572e73e9aba878e03496c4 --- vendor/win-iconv/CMakeLists.txt | 82 + vendor/win-iconv/ChangeLog | 166 ++ vendor/win-iconv/FindWcecompat.cmake | 33 + vendor/win-iconv/Makefile | 110 ++ vendor/win-iconv/iconv.def | 23 + vendor/win-iconv/iconv.h | 21 + vendor/win-iconv/mlang.def | 11 + vendor/win-iconv/mlang.h | 54 + vendor/win-iconv/readme.txt | 20 + vendor/win-iconv/win_iconv.c | 2079 ++++++++++++++++++++++++++ vendor/win-iconv/win_iconv_test.c | 286 ++++ 11 files changed, 2885 insertions(+) create mode 100644 vendor/win-iconv/CMakeLists.txt create mode 100644 vendor/win-iconv/ChangeLog create mode 100644 vendor/win-iconv/FindWcecompat.cmake create mode 100644 vendor/win-iconv/Makefile create mode 100644 vendor/win-iconv/iconv.def create mode 100644 vendor/win-iconv/iconv.h create mode 100644 vendor/win-iconv/mlang.def create mode 100644 vendor/win-iconv/mlang.h create mode 100644 vendor/win-iconv/readme.txt create mode 100644 vendor/win-iconv/win_iconv.c create mode 100644 vendor/win-iconv/win_iconv_test.c diff --git a/vendor/win-iconv/CMakeLists.txt b/vendor/win-iconv/CMakeLists.txt new file mode 100644 index 00000000..c8a1aa75 --- /dev/null +++ b/vendor/win-iconv/CMakeLists.txt @@ -0,0 +1,82 @@ +project(win_iconv) + +cmake_minimum_required(VERSION 2.6) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) + +set(CPACK_GENERATOR "TBZ2") +include(CPack) + +option(BUILD_TEST "build test executable" OFF) +if(WINCE) + option(BUILD_STATIC "build the static library" ON) + option(BUILD_SHARED "build the shared library" OFF) + option(BUILD_EXECUTABLE "build the win_iconv executable" OFF) +else(WINCE) + option(BUILD_STATIC "build the static library" OFF) + option(BUILD_SHARED "build the shared library" ON) + option(BUILD_EXECUTABLE "build the win_iconv executable" ON) +endif(WINCE) + +if(BUILD_TEST) + enable_testing() +endif(BUILD_TEST) + +if(MSVC) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) +endif(MSVC) + +if(NOT WINCE) + add_definitions(-DUSE_LIBICONV_DLL) + if(DEFAULT_LIBICONV_DLL) + add_definitions(-DDEFAULT_LIBICONV_DLL=${DEFAULT_LIBICONV_DLL}) + endif(DEFAULT_LIBICONV_DLL) +else(NOT WINCE) + find_package(Wcecompat REQUIRED) + include_directories(${WCECOMPAT_INCLUDE_DIR}) +endif(NOT WINCE) + +if(BUILD_SHARED) + add_library(iconv SHARED win_iconv.c iconv.def) + set_target_properties(iconv PROPERTIES COMPILE_FLAGS "-DMAKE_DLL" + PREFIX "") + if(WINCE) + target_link_libraries(iconv ${WCECOMPAT_LIBRARIES}) + endif(WINCE) + install(TARGETS iconv RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) +endif(BUILD_SHARED) + +if(BUILD_EXECUTABLE) + add_executable(win_iconv win_iconv.c) + set_target_properties(win_iconv PROPERTIES COMPILE_FLAGS "-DMAKE_EXE") + if(WINCE) + target_link_libraries(win_iconv ${WCECOMPAT_LIBRARIES}) + endif(WINCE) + install(TARGETS win_iconv RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) +endif(BUILD_EXECUTABLE) + +if(BUILD_STATIC) + add_library(iconv-static STATIC win_iconv.c) + set_target_properties(iconv-static PROPERTIES OUTPUT_NAME "iconv") + if(WINCE) + target_link_libraries(iconv-static ${WCECOMPAT_LIBRARIES}) + endif(WINCE) + install(TARGETS iconv-static RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) +endif(BUILD_STATIC) + + +install(FILES iconv.h DESTINATION include) + +if(BUILD_TEST) + # tests: + add_executable(win_iconv_test win_iconv_test.c) + add_test(win_iconv_test win_iconv_test) + if(WINCE) + target_link_libraries(win_iconv_test ${WCECOMPAT_LIBRARIES}) + endif(WINCE) +endif(BUILD_TEST) diff --git a/vendor/win-iconv/ChangeLog b/vendor/win-iconv/ChangeLog new file mode 100644 index 00000000..26048fbc --- /dev/null +++ b/vendor/win-iconv/ChangeLog @@ -0,0 +1,166 @@ +2016-01-12 Yukihiro Nakadaira + + * win_iconv.c, win_iconv_test.c: Make >=0x80 byte illegal in ascii. + * iconv.h: Add WINICONV_CONST macro. + +2014-02-05 Yukihiro Nakadaira + + * win_iconv.c: Added alias. ISO_8859-* ISO_8859_* + * win_iconv.c, win_iconv_test.c: Fixed for compiler warning. + +2013-09-15 Yukihiro Nakadaira + + * iconv.h: Fixed c++ style comment. (Issue 21) (Thanks to bgilbert) + +2012-11-22 Yukihiro Nakadaira + + * win_iconv.c: Fix warnings. + (Issue 19) (Thanks to yselkowitz) + +2012-10-21 Yukihiro Nakadaira + + * win_iconv.c, win_iconv_test.c: Add //ignore and -c flag. + +2012-10-15 Yukihiro Nakadaira + + * win_iconv.c, win_iconv_test.c: cosmetic change. + +2012-09-19 Yukihiro Nakadaira + + * iconv.h, win_iconv.c, win_iconv_test.c: Change iconv(3) prototype. + "const char **inbuf" -> "char **inbuf" + (Issue 8) + + * win_iconv.c: Change to not use TEXT macro for GetProcAddress. + (Issue 17) (Thanks to EPienkowskia) + + * win_iconv_test.c: Fix for -DUNICODE. Use GetModuleFileNameA. + +2011-10-28 Yukihiro Nakadaira + + * win_iconv.c: Add UCS-2. + (Issue 14) (Thanks to j.g.rennison) + +2011-10-24 Yukihiro Nakadaira + + * win_iconv.c: Add Big5-HKSCS alias. + (Issue 13) (Thanks to timothy.ty.lee) + +2011-09-06 Yukihiro Nakadaira + + * Makefile: Improvement of the creation of the DLL. + (Issue 10) (Thanks to vincent.torri) + +2011-08-19 Yukihiro Nakadaira + + * win_iconv.c: Fixed a bug that assumption that + sizeof(DWORD)==sizeof(void*) in find_imported_module_by_funcname. + (Issue 7) (Thanks to j.g.rennison) + +2011-08-13 Yukihiro Nakadaira + + * win_iconv.c, win_iconv_test.c: Fixed a bug that //translit + flag does not work when transliterating to the default + character. + (Issue 9) (Thanks to j.g.rennison) + +2011-07-26 Yukihiro Nakadaira + + * CMakeLists.txt: fix dll name with mingw. + (Issue 6) (Thanks to kalevlember) + + +2011-05-19 Yukihiro Nakadaira + + * win_iconv.c: Add some more UCS aliases. + Merge from Tor Lillqvist version. + (Issue 4) (Thanks to mkbosmans) + +2011-05-15 Yukihiro Nakadaira + + * Makefile: use variable for tools in Makefile + (Issue 3) (Thanks to mkbosmans) + +2011-01-13 Yukihiro Nakadaira + + * win_iconv_test.c: Removed unused variable. + + * win_iconv_test.c: Added USE_ICONV_H flag to compile with -liconv. + (Issue 2) (Thanks to amorilia.gamebox) + +2010-04-14 Patrick von Reth + + * added c++ support + +2010-03-28 Patrick Spendrin + + * CMakeLists.txt, win_iconv.c: add CMake buildsystem, fix bug from issue tracker + +2009-07-25 Yukihiro Nakadaira + + * win_iconv.c, readme.txt: doc fix + +2009-07-06 Yukihiro Nakadaira + + * win_iconv.c, Makefile, readme.txt: doc fix + +2009-06-19 Yukihiro Nakadaira + + * win_iconv.c: cosmetic change + * win_iconv.c: Change Unicode BOM behavior + 1. Remove the BOM when "fromcode" is utf-16 or utf-32. + 2. Add the BOM when "tocode" is utf-16 or utf-32. + +2009-06-18 Yukihiro Nakadaira + + * win_iconv.c: Fixed a bug that invalid input may cause an + endless loop + +2009-06-18 Yukihiro Nakadaira + + * win_iconv.c: Fixed a bug that libiconv_iconv_open() doesn't + work (Christophe Benoit) + +2008-04-01 Yukihiro Nakadaira + + * win_iconv.c: Added //TRANSLIT option. + http://bugzilla.gnome.org/show_bug.cgi?id=524314 + +2008-03-20 Yukihiro Nakadaira + + * win_iconv.c: The dwFlags parameter to MultiByteToWideChars() + must be zero for some code pages (Tor Lillqvist) + +2008-03-19 Yukihiro Nakadaira + + * win_iconv.c: Added support for UCS-2 and GB18030 (Tor Lillqvist) + +2007-12-03 Yukihiro Nakadaira + + * iconv.h: #include to use size_t + +2007-11-28 Yukihiro Nakadaira + + * win_iconv.c: bug fix for two things (Tor Lillqvist) + 1) This is probably not important: Add a function + must_use_null_useddefaultchar() that checks for those + codepages for which the docs for WideCharToMultiByte() say + one has to use a NULL lpDefaultChar pointer. Don't know if + this is actually needed, but better to be safe than sorry. + 2) This is essential: In kernel_wctomb(), the code should first + check if bufsize is zero, and return the E2BIG error in that + case. + +2007-11-26 Yukihiro Nakadaira + + * win_iconv.c: ISO-8859-1 should be CP28591, not CP1252 (Tor + Lillqvist) + +2007-11-26 Yukihiro Nakadaira + + * win_iconv.c: patch from Tor Lillqvist (with alteration) + +2007-09-04 Yukihiro Nakadaira + + * : Initial import + diff --git a/vendor/win-iconv/FindWcecompat.cmake b/vendor/win-iconv/FindWcecompat.cmake new file mode 100644 index 00000000..a3f7373f --- /dev/null +++ b/vendor/win-iconv/FindWcecompat.cmake @@ -0,0 +1,33 @@ +# Try to find Wcecompat functionality +# Once done this will define +# +# WCECOMPAT_FOUND - system has Wcecompat +# WCECOMPAT_INCLUDE_DIR - Wcecompat include directory +# WCECOMPAT_LIBRARIES - Libraries needed to use Wcecompat +# +# Copyright (c) 2010, Andreas Holzammer, +# +# Redistribution and use is allowed according to the terms of the BSD license. + +if(WCECOMPAT_INCLUDE_DIR AND WCECOMPAT_LIB_FOUND) + set(Wcecompat_FIND_QUIETLY TRUE) +endif(WCECOMPAT_INCLUDE_DIR AND WCECOMPAT_LIB_FOUND) + +find_path(WCECOMPAT_INCLUDE_DIR errno.h PATH_SUFFIXES wcecompat) + +set(WCECOMPAT_LIB_FOUND FALSE) + +if(WCECOMPAT_INCLUDE_DIR) + find_library(WCECOMPAT_LIBRARIES NAMES wcecompat wcecompatex ) + if(WCECOMPAT_LIBRARIES) + set(WCECOMPAT_LIB_FOUND TRUE) + endif(WCECOMPAT_LIBRARIES) +endif(WCECOMPAT_INCLUDE_DIR) + +# I have no idea what this is about, but it seems to be used quite often, so I add this here +set(WCECOMPAT_CONST const) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Wcecompat DEFAULT_MSG WCECOMPAT_LIBRARIES WCECOMPAT_LIB_FOUND) + +mark_as_advanced(WCECOMPAT_INCLUDE_DIR WCECOMPAT_LIBRARIES WCECOMPAT_CONST WCECOMPAT_LIB_FOUND) diff --git a/vendor/win-iconv/Makefile b/vendor/win-iconv/Makefile new file mode 100644 index 00000000..5937584f --- /dev/null +++ b/vendor/win-iconv/Makefile @@ -0,0 +1,110 @@ +# Makefile for win-iconv +# +# Variables that can be overridden: +# +# CC, AR, RANLIB, DLLTOOL +# MKDIR_P, INSTALL, RM +# prefix, BINARY_PATH, INCLUDE_PATH, LIBRARY_PATH + +CC ?= gcc +AR ?= ar +RANLIB ?= ranlib +DLLTOOL ?= dlltool + +MKDIR_P = mkdir -p +INSTALL = install -c +RM = rm -f + +# comma separated list (e.g. "iconv.dll,libiconv.dll") +DEFAULT_LIBICONV_DLL ?= \"\" + +CFLAGS += -pedantic -Wall +CFLAGS += -DUSE_LIBICONV_DLL +CFLAGS += -DDEFAULT_LIBICONV_DLL=$(DEFAULT_LIBICONV_DLL) + +prefix ?= /usr/local +BINARY_PATH = $(prefix)/bin +INCLUDE_PATH = $(prefix)/include +LIBRARY_PATH = $(prefix)/lib + +all: iconv.dll libiconv.a win_iconv.exe + +dist: test win_iconv.zip + +iconv.dll: win_iconv.c + $(CC) $(CFLAGS) -c win_iconv.c -DMAKE_DLL + $(CC) -shared -o iconv.dll -Wl,-s -Wl,--out-implib=libiconv.dll.a -Wl,--export-all-symbols win_iconv.o $(SPECS_FLAGS) + +libiconv.a: win_iconv.c + $(CC) $(CFLAGS) -c win_iconv.c + $(AR) rcs libiconv.a win_iconv.o + $(RANLIB) libiconv.a + +win_iconv.exe: win_iconv.c + $(CC) $(CFLAGS) -s -o win_iconv.exe win_iconv.c -DMAKE_EXE + +libmlang.a: mlang.def + $(DLLTOOL) --kill-at --input-def mlang.def --output-lib libmlang.a + +test: + $(CC) $(CFLAGS) -s -o win_iconv_test.exe win_iconv_test.c + ./win_iconv_test.exe + +win_iconv.zip: msvcrt msvcr70 msvcr71 + rm -rf win_iconv + svn export . win_iconv + cp msvcrt/iconv.dll msvcrt/win_iconv.exe win_iconv/ + mkdir win_iconv/msvcr70 + cp msvcr70/iconv.dll win_iconv/msvcr70/ + mkdir win_iconv/msvcr71 + cp msvcr71/iconv.dll win_iconv/msvcr71/ + zip -r win_iconv.zip win_iconv + +msvcrt: + svn export . msvcrt; \ + cd msvcrt; \ + $(MAKE); + +msvcr70: + svn export . msvcr70; \ + cd msvcr70; \ + gcc -dumpspecs | sed s/-lmsvcrt/-lmsvcr70/ > specs; \ + $(MAKE) "SPECS_FLAGS=-specs=$$PWD/specs"; + +msvcr71: + svn export . msvcr71; \ + cd msvcr71; \ + gcc -dumpspecs | sed s/-lmsvcrt/-lmsvcr71/ > specs; \ + $(MAKE) "SPECS_FLAGS=-specs=$$PWD/specs"; + +install: iconv.dll libiconv.a win_iconv.exe + -@$(MKDIR_P) '$(DESTDIR)$(BINARY_PATH)' + -@$(MKDIR_P) '$(DESTDIR)$(INCLUDE_PATH)' + -@$(MKDIR_P) '$(DESTDIR)$(LIBRARY_PATH)' + -$(INSTALL) iconv.dll '$(DESTDIR)$(BINARY_PATH)' + -$(INSTALL) win_iconv.exe '$(DESTDIR)$(BINARY_PATH)' + -$(INSTALL) -m 0644 iconv.h '$(DESTDIR)$(INCLUDE_PATH)' + -$(INSTALL) -m 0644 libiconv.dll.a '$(DESTDIR)$(LIBRARY_PATH)' + -$(INSTALL) -m 0644 libiconv.a '$(DESTDIR)$(LIBRARY_PATH)' + +uninstall: + -$(RM) '$(DESTDIR)$(LIBRARY_PATH)'/libiconv.a + -$(RM) '$(DESTDIR)$(LIBRARY_PATH)'/libiconv.dll.a + -$(RM) '$(DESTDIR)$(INCLUDE_PATH)'/iconv.h + -$(RM) '$(DESTDIR)$(BINARY_PATH)'/win_iconv.exe + -$(RM) '$(DESTDIR)$(BINARY_PATH)'/iconv.dll + +clean: + rm -f win_iconv.exe + rm -f win_iconv.o + rm -f iconv.dll* + rm -f libiconv.a + rm -f libiconv.dll + rm -f win_iconv_test.exe + rm -f libmlang.a + rm -rf win_iconv + rm -rf win_iconv.zip + rm -rf msvcrt + rm -rf msvcr70 + rm -rf msvcr71 + diff --git a/vendor/win-iconv/iconv.def b/vendor/win-iconv/iconv.def new file mode 100644 index 00000000..d3da9ae2 --- /dev/null +++ b/vendor/win-iconv/iconv.def @@ -0,0 +1,23 @@ +EXPORTS + iconv + iconv_open + iconv_close + iconvctl + libiconv=iconv + libiconv_open=iconv_open + libiconv_close=iconv_close + libiconvctl=iconvctl +;; libiconv-1.11.dll +;; TODO for binary compatibility +; _libiconv_version @1 +; aliases2_lookup @2 +; aliases_lookup @3 +; iconv_canonicalize @4 +; libiconv @5 +; libiconv_close @6 +; libiconv_open @7 +; libiconv_relocate @8 +; libiconv_set_relocation_prefix @9 +; libiconvctl @10 +; libiconvlist @11 +; locale_charset @12 diff --git a/vendor/win-iconv/iconv.h b/vendor/win-iconv/iconv.h new file mode 100644 index 00000000..1a9532b7 --- /dev/null +++ b/vendor/win-iconv/iconv.h @@ -0,0 +1,21 @@ +#ifndef _LIBICONV_H +#define _LIBICONV_H +#include +#ifndef WINICONV_CONST +# ifdef ICONV_CONST +# define WINICONV_CONST ICONV_CONST +# else +# define WINICONV_CONST const +# endif +#endif +#ifdef __cplusplus +extern "C" { +#endif +typedef void* iconv_t; +iconv_t iconv_open(const char *tocode, const char *fromcode); +int iconv_close(iconv_t cd); +size_t iconv(iconv_t cd, WINICONV_CONST char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +#ifdef __cplusplus +} +#endif +#endif diff --git a/vendor/win-iconv/mlang.def b/vendor/win-iconv/mlang.def new file mode 100644 index 00000000..cb475aee --- /dev/null +++ b/vendor/win-iconv/mlang.def @@ -0,0 +1,11 @@ +LIBRARY MLANG.DLL +EXPORTS + ConvertINetMultiByteToUnicode@24 + ;; ConvertINetReset (not documented) + ConvertINetString@28 + ConvertINetUnicodeToMultiByte@24 + IsConvertINetStringAvailable@8 + LcidToRfc1766A@12 + LcidToRfc1766W@12 + Rfc1766ToLcidA@8 + Rfc1766ToLcidW@8 diff --git a/vendor/win-iconv/mlang.h b/vendor/win-iconv/mlang.h new file mode 100644 index 00000000..525f484e --- /dev/null +++ b/vendor/win-iconv/mlang.h @@ -0,0 +1,54 @@ +HRESULT WINAPI ConvertINetString( + LPDWORD lpdwMode, + DWORD dwSrcEncoding, + DWORD dwDstEncoding, + LPCSTR lpSrcStr, + LPINT lpnSrcSize, + LPBYTE lpDstStr, + LPINT lpnDstSize +); + +HRESULT WINAPI ConvertINetMultiByteToUnicode( + LPDWORD lpdwMode, + DWORD dwSrcEncoding, + LPCSTR lpSrcStr, + LPINT lpnMultiCharCount, + LPWSTR lpDstStr, + LPINT lpnWideCharCount +); + +HRESULT WINAPI ConvertINetUnicodeToMultiByte( + LPDWORD lpdwMode, + DWORD dwEncoding, + LPCWSTR lpSrcStr, + LPINT lpnWideCharCount, + LPSTR lpDstStr, + LPINT lpnMultiCharCount +); + +HRESULT WINAPI IsConvertINetStringAvailable( + DWORD dwSrcEncoding, + DWORD dwDstEncoding +); + +HRESULT WINAPI LcidToRfc1766A( + LCID Locale, + LPSTR pszRfc1766, + int nChar +); + +HRESULT WINAPI LcidToRfc1766W( + LCID Locale, + LPWSTR pszRfc1766, + int nChar +); + +HRESULT WINAPI Rfc1766ToLcidA( + LCID *pLocale, + LPSTR pszRfc1766 +); + +HRESULT WINAPI Rfc1766ToLcidW( + LCID *pLocale, + LPWSTR pszRfc1766 +); diff --git a/vendor/win-iconv/readme.txt b/vendor/win-iconv/readme.txt new file mode 100644 index 00000000..f2de525e --- /dev/null +++ b/vendor/win-iconv/readme.txt @@ -0,0 +1,20 @@ +win_iconv is a iconv implementation using Win32 API to convert. + +win_iconv is placed in the public domain. + +ENVIRONMENT VARIABLE: + WINICONV_LIBICONV_DLL + If $WINICONV_LIBICONV_DLL is set, win_iconv uses the DLL. If + loading the DLL or iconv_open() failed, falls back to internal + conversion. If a few DLL are specified as comma separated list, + the first loadable DLL is used. The DLL should have + iconv_open(), iconv_close() and iconv(). Or libiconv_open(), + libiconv_close() and libiconv(). + (only available when USE_LIBICONV_DLL is defined at compile time) + +Win32 API does not support strict encoding conversion for some codepage. +And MLang function drops or replaces invalid bytes and does not return +useful error status as iconv does. This implementation cannot be used for +encoding validation purpose. + +Yukihiro Nakadaira diff --git a/vendor/win-iconv/win_iconv.c b/vendor/win-iconv/win_iconv.c new file mode 100644 index 00000000..72f80686 --- /dev/null +++ b/vendor/win-iconv/win_iconv.c @@ -0,0 +1,2079 @@ +/* + * iconv implementation using Win32 API to convert. + * + * This file is placed in the public domain. + */ + +/* for WC_NO_BEST_FIT_CHARS */ +#ifndef WINVER +# define WINVER 0x0500 +#endif + +#define STRICT +#include +#include +#include +#include + +#ifdef __GNUC__ +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +/* WORKAROUND: */ +#ifndef UNDER_CE +#define GetProcAddressA GetProcAddress +#endif + +#if 0 +# define MAKE_EXE +# define MAKE_DLL +# define USE_LIBICONV_DLL +#endif + +#if !defined(DEFAULT_LIBICONV_DLL) +# define DEFAULT_LIBICONV_DLL "" +#endif + +#define MB_CHAR_MAX 16 + +#define UNICODE_MODE_BOM_DONE 1 +#define UNICODE_MODE_SWAPPED 2 + +#define FLAG_USE_BOM 1 +#define FLAG_TRANSLIT 2 /* //TRANSLIT */ +#define FLAG_IGNORE 4 /* //IGNORE */ + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +typedef void* iconv_t; + +iconv_t iconv_open(const char *tocode, const char *fromcode); +int iconv_close(iconv_t cd); +size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); + +/* libiconv interface for vim */ +#if defined(MAKE_DLL) +int +iconvctl (iconv_t cd, int request, void* argument) +{ + /* not supported */ + return 0; +} +#endif + +typedef struct compat_t compat_t; +typedef struct csconv_t csconv_t; +typedef struct rec_iconv_t rec_iconv_t; + +typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode); +typedef int (*f_iconv_close)(iconv_t cd); +typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +typedef int* (*f_errno)(void); +typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); +typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); + +#define COMPAT_IN 1 +#define COMPAT_OUT 2 + +/* unicode mapping for compatibility with other conversion table. */ +struct compat_t { + uint in; + uint out; + uint flag; +}; + +struct csconv_t { + int codepage; + int flags; + f_mbtowc mbtowc; + f_wctomb wctomb; + f_mblen mblen; + f_flush flush; + DWORD mode; + compat_t *compat; +}; + +struct rec_iconv_t { + iconv_t cd; + f_iconv_close iconv_close; + f_iconv iconv; + f_errno _errno; + csconv_t from; + csconv_t to; +#if defined(USE_LIBICONV_DLL) + HMODULE hlibiconv; +#endif +}; + +static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); +static int win_iconv_close(iconv_t cd); +static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); + +static int load_mlang(void); +static int make_csconv(const char *name, csconv_t *cv); +static int name_to_codepage(const char *name); +static uint utf16_to_ucs4(const ushort *wbuf); +static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); +static int mbtowc_flags(int codepage); +static int must_use_null_useddefaultchar(int codepage); +static char *strrstr(const char *str, const char *token); +static char *xstrndup(const char *s, size_t n); +static int seterror(int err); + +#if defined(USE_LIBICONV_DLL) +static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); +static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size); +static FARPROC find_imported_function(HMODULE hModule, const char *funcname); + +static HMODULE hwiniconv; +#endif + +static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); +static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); +static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); +static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); +static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); + +static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); +static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); +static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); + +static struct { + int codepage; + const char *name; +} codepage_alias[] = { + {65001, "CP65001"}, + {65001, "UTF8"}, + {65001, "UTF-8"}, + + {1200, "CP1200"}, + {1200, "UTF16LE"}, + {1200, "UTF-16LE"}, + {1200, "UCS2LE"}, + {1200, "UCS-2LE"}, + {1200, "UCS-2-INTERNAL"}, + + {1201, "CP1201"}, + {1201, "UTF16BE"}, + {1201, "UTF-16BE"}, + {1201, "UCS2BE"}, + {1201, "UCS-2BE"}, + {1201, "unicodeFFFE"}, + + {12000, "CP12000"}, + {12000, "UTF32LE"}, + {12000, "UTF-32LE"}, + {12000, "UCS4LE"}, + {12000, "UCS-4LE"}, + + {12001, "CP12001"}, + {12001, "UTF32BE"}, + {12001, "UTF-32BE"}, + {12001, "UCS4BE"}, + {12001, "UCS-4BE"}, + +#ifndef GLIB_COMPILATION + /* + * Default is big endian. + * See rfc2781 4.3 Interpreting text labelled as UTF-16. + */ + {1201, "UTF16"}, + {1201, "UTF-16"}, + {1201, "UCS2"}, + {1201, "UCS-2"}, + {12001, "UTF32"}, + {12001, "UTF-32"}, + {12001, "UCS-4"}, + {12001, "UCS4"}, +#else + /* Default is little endian, because the platform is */ + {1200, "UTF16"}, + {1200, "UTF-16"}, + {1200, "UCS2"}, + {1200, "UCS-2"}, + {12000, "UTF32"}, + {12000, "UTF-32"}, + {12000, "UCS4"}, + {12000, "UCS-4"}, +#endif + + /* copy from libiconv `iconv -l` */ + /* !IsValidCodePage(367) */ + {20127, "ANSI_X3.4-1968"}, + {20127, "ANSI_X3.4-1986"}, + {20127, "ASCII"}, + {20127, "CP367"}, + {20127, "IBM367"}, + {20127, "ISO-IR-6"}, + {20127, "ISO646-US"}, + {20127, "ISO_646.IRV:1991"}, + {20127, "US"}, + {20127, "US-ASCII"}, + {20127, "CSASCII"}, + + /* !IsValidCodePage(819) */ + {1252, "CP819"}, + {1252, "IBM819"}, + {28591, "ISO-8859-1"}, + {28591, "ISO-IR-100"}, + {28591, "ISO8859-1"}, + {28591, "ISO_8859-1"}, + {28591, "ISO_8859-1:1987"}, + {28591, "L1"}, + {28591, "LATIN1"}, + {28591, "CSISOLATIN1"}, + + {1250, "CP1250"}, + {1250, "MS-EE"}, + {1250, "WINDOWS-1250"}, + + {1251, "CP1251"}, + {1251, "MS-CYRL"}, + {1251, "WINDOWS-1251"}, + + {1252, "CP1252"}, + {1252, "MS-ANSI"}, + {1252, "WINDOWS-1252"}, + + {1253, "CP1253"}, + {1253, "MS-GREEK"}, + {1253, "WINDOWS-1253"}, + + {1254, "CP1254"}, + {1254, "MS-TURK"}, + {1254, "WINDOWS-1254"}, + + {1255, "CP1255"}, + {1255, "MS-HEBR"}, + {1255, "WINDOWS-1255"}, + + {1256, "CP1256"}, + {1256, "MS-ARAB"}, + {1256, "WINDOWS-1256"}, + + {1257, "CP1257"}, + {1257, "WINBALTRIM"}, + {1257, "WINDOWS-1257"}, + + {1258, "CP1258"}, + {1258, "WINDOWS-1258"}, + + {850, "850"}, + {850, "CP850"}, + {850, "IBM850"}, + {850, "CSPC850MULTILINGUAL"}, + + /* !IsValidCodePage(862) */ + {862, "862"}, + {862, "CP862"}, + {862, "IBM862"}, + {862, "CSPC862LATINHEBREW"}, + + {866, "866"}, + {866, "CP866"}, + {866, "IBM866"}, + {866, "CSIBM866"}, + + /* !IsValidCodePage(154) */ + {154, "CP154"}, + {154, "CYRILLIC-ASIAN"}, + {154, "PT154"}, + {154, "PTCP154"}, + {154, "CSPTCP154"}, + + /* !IsValidCodePage(1133) */ + {1133, "CP1133"}, + {1133, "IBM-CP1133"}, + + {874, "CP874"}, + {874, "WINDOWS-874"}, + + /* !IsValidCodePage(51932) */ + {51932, "CP51932"}, + {51932, "MS51932"}, + {51932, "WINDOWS-51932"}, + {51932, "EUC-JP"}, + + {932, "CP932"}, + {932, "MS932"}, + {932, "SHIFFT_JIS"}, + {932, "SHIFFT_JIS-MS"}, + {932, "SJIS"}, + {932, "SJIS-MS"}, + {932, "SJIS-OPEN"}, + {932, "SJIS-WIN"}, + {932, "WINDOWS-31J"}, + {932, "WINDOWS-932"}, + {932, "CSWINDOWS31J"}, + + {50221, "CP50221"}, + {50221, "ISO-2022-JP"}, + {50221, "ISO-2022-JP-MS"}, + {50221, "ISO2022-JP"}, + {50221, "ISO2022-JP-MS"}, + {50221, "MS50221"}, + {50221, "WINDOWS-50221"}, + + {936, "CP936"}, + {936, "GBK"}, + {936, "MS936"}, + {936, "WINDOWS-936"}, + + {950, "CP950"}, + {950, "BIG5"}, + {950, "BIG5HKSCS"}, + {950, "BIG5-HKSCS"}, + + {949, "CP949"}, + {949, "UHC"}, + {949, "EUC-KR"}, + + {1361, "CP1361"}, + {1361, "JOHAB"}, + + {437, "437"}, + {437, "CP437"}, + {437, "IBM437"}, + {437, "CSPC8CODEPAGE437"}, + + {737, "CP737"}, + + {775, "CP775"}, + {775, "IBM775"}, + {775, "CSPC775BALTIC"}, + + {852, "852"}, + {852, "CP852"}, + {852, "IBM852"}, + {852, "CSPCP852"}, + + /* !IsValidCodePage(853) */ + {853, "CP853"}, + + {855, "855"}, + {855, "CP855"}, + {855, "IBM855"}, + {855, "CSIBM855"}, + + {857, "857"}, + {857, "CP857"}, + {857, "IBM857"}, + {857, "CSIBM857"}, + + /* !IsValidCodePage(858) */ + {858, "CP858"}, + + {860, "860"}, + {860, "CP860"}, + {860, "IBM860"}, + {860, "CSIBM860"}, + + {861, "861"}, + {861, "CP-IS"}, + {861, "CP861"}, + {861, "IBM861"}, + {861, "CSIBM861"}, + + {863, "863"}, + {863, "CP863"}, + {863, "IBM863"}, + {863, "CSIBM863"}, + + {864, "CP864"}, + {864, "IBM864"}, + {864, "CSIBM864"}, + + {865, "865"}, + {865, "CP865"}, + {865, "IBM865"}, + {865, "CSIBM865"}, + + {869, "869"}, + {869, "CP-GR"}, + {869, "CP869"}, + {869, "IBM869"}, + {869, "CSIBM869"}, + + /* !IsValidCodePage(1152) */ + {1125, "CP1125"}, + + /* + * Code Page Identifiers + * http://msdn2.microsoft.com/en-us/library/ms776446.aspx + */ + {37, "IBM037"}, /* IBM EBCDIC US-Canada */ + {437, "IBM437"}, /* OEM United States */ + {500, "IBM500"}, /* IBM EBCDIC International */ + {708, "ASMO-708"}, /* Arabic (ASMO 708) */ + /* 709 Arabic (ASMO-449+, BCON V4) */ + /* 710 Arabic - Transparent Arabic */ + {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */ + {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */ + {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */ + {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */ + {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */ + {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */ + {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */ + {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */ + {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */ + {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */ + {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */ + {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */ + {864, "IBM864"}, /* OEM Arabic; Arabic (864) */ + {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */ + {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */ + {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */ + {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ + {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ + {875, "cp875"}, /* IBM EBCDIC Greek Modern */ + {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ + {932, "shift-jis"}, /* alternative name for it */ + {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ + {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */ + {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ + {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ + {950, "big5-hkscs"}, /* alternative name for it */ + {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */ + {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */ + {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ + {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ + {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ + {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ + {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ + {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ + {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ + {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ + {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ + {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ + {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */ + {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */ + {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */ + {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */ + {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */ + {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */ + {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */ + {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */ + {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ + {1361, "Johab"}, /* Korean (Johab) */ + {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */ + {10001, "x-mac-japanese"}, /* Japanese (Mac) */ + {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ + {10003, "x-mac-korean"}, /* Korean (Mac) */ + {10004, "x-mac-arabic"}, /* Arabic (Mac) */ + {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */ + {10006, "x-mac-greek"}, /* Greek (Mac) */ + {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */ + {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ + {10010, "x-mac-romanian"}, /* Romanian (Mac) */ + {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */ + {10021, "x-mac-thai"}, /* Thai (Mac) */ + {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */ + {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */ + {10081, "x-mac-turkish"}, /* Turkish (Mac) */ + {10082, "x-mac-croatian"}, /* Croatian (Mac) */ + {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */ + {20001, "x-cp20001"}, /* TCA Taiwan */ + {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */ + {20003, "x-cp20003"}, /* IBM5550 Taiwan */ + {20004, "x-cp20004"}, /* TeleText Taiwan */ + {20005, "x-cp20005"}, /* Wang Taiwan */ + {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ + {20106, "x-IA5-German"}, /* IA5 German (7-bit) */ + {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */ + {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */ + {20127, "us-ascii"}, /* US-ASCII (7-bit) */ + {20261, "x-cp20261"}, /* T.61 */ + {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */ + {20273, "IBM273"}, /* IBM EBCDIC Germany */ + {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */ + {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */ + {20280, "IBM280"}, /* IBM EBCDIC Italy */ + {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */ + {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */ + {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */ + {20297, "IBM297"}, /* IBM EBCDIC France */ + {20420, "IBM420"}, /* IBM EBCDIC Arabic */ + {20423, "IBM423"}, /* IBM EBCDIC Greek */ + {20424, "IBM424"}, /* IBM EBCDIC Hebrew */ + {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */ + {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */ + {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */ + {20871, "IBM871"}, /* IBM EBCDIC Icelandic */ + {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */ + {20905, "IBM905"}, /* IBM EBCDIC Turkish */ + {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ + {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */ + {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ + {20949, "x-cp20949"}, /* Korean Wansung */ + {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ + /* 21027 (deprecated) */ + {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ + {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ + {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ + {28591, "iso_8859-1"}, + {28591, "iso_8859_1"}, + {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ + {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ + {28592, "iso_8859-2"}, + {28592, "iso_8859_2"}, + {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */ + {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */ + {28593, "iso_8859-3"}, + {28593, "iso_8859_3"}, + {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */ + {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */ + {28594, "iso_8859-4"}, + {28594, "iso_8859_4"}, + {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */ + {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */ + {28595, "iso_8859-5"}, + {28595, "iso_8859_5"}, + {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */ + {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */ + {28596, "iso_8859-6"}, + {28596, "iso_8859_6"}, + {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */ + {28597, "iso8859-7"}, /* ISO 8859-7 Greek */ + {28597, "iso_8859-7"}, + {28597, "iso_8859_7"}, + {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ + {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ + {28598, "iso_8859-8"}, + {28598, "iso_8859_8"}, + {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */ + {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */ + {28599, "iso_8859-9"}, + {28599, "iso_8859_9"}, + {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */ + {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */ + {28603, "iso_8859-13"}, + {28603, "iso_8859_13"}, + {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */ + {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */ + {28605, "iso_8859-15"}, + {28605, "iso_8859_15"}, + {29001, "x-Europa"}, /* Europa 3 */ + {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ + {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ + {38598, "iso_8859-8-i"}, + {38598, "iso_8859_8-i"}, + {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ + {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ + {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ + {50225, "iso-2022-kr"}, /* ISO 2022 Korean */ + {50225, "iso2022-kr"}, /* ISO 2022 Korean */ + {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ + /* 50229 ISO 2022 Traditional Chinese */ + /* 50930 EBCDIC Japanese (Katakana) Extended */ + /* 50931 EBCDIC US-Canada and Japanese */ + /* 50933 EBCDIC Korean Extended and Korean */ + /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ + /* 50936 EBCDIC Simplified Chinese */ + /* 50937 EBCDIC US-Canada and Traditional Chinese */ + /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ + {51932, "euc-jp"}, /* EUC Japanese */ + {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */ + {51949, "euc-kr"}, /* EUC Korean */ + /* 51950 EUC Traditional Chinese */ + {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ + {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ + {57002, "x-iscii-de"}, /* ISCII Devanagari */ + {57003, "x-iscii-be"}, /* ISCII Bengali */ + {57004, "x-iscii-ta"}, /* ISCII Tamil */ + {57005, "x-iscii-te"}, /* ISCII Telugu */ + {57006, "x-iscii-as"}, /* ISCII Assamese */ + {57007, "x-iscii-or"}, /* ISCII Oriya */ + {57008, "x-iscii-ka"}, /* ISCII Kannada */ + {57009, "x-iscii-ma"}, /* ISCII Malayalam */ + {57010, "x-iscii-gu"}, /* ISCII Gujarati */ + {57011, "x-iscii-pa"}, /* ISCII Punjabi */ + + {0, NULL} +}; + +/* + * SJIS SHIFTJIS table CP932 table + * ---- --------------------------- -------------------------------- + * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS + * 7E U+203E OVERLINE U+007E TILDE + * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR + * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS + * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE + * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO + * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS + * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN + * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN + * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN + * + * EUC-JP and ISO-2022-JP should be compatible with CP932. + * + * Kernel and MLang have different Unicode mapping table. Make sure + * which API is used. + */ +static compat_t cp932_compat[] = { + {0x00A5, 0x005C, COMPAT_OUT}, + {0x203E, 0x007E, COMPAT_OUT}, + {0x2014, 0x2015, COMPAT_OUT}, + {0x301C, 0xFF5E, COMPAT_OUT}, + {0x2016, 0x2225, COMPAT_OUT}, + {0x2212, 0xFF0D, COMPAT_OUT}, + {0x00A2, 0xFFE0, COMPAT_OUT}, + {0x00A3, 0xFFE1, COMPAT_OUT}, + {0x00AC, 0xFFE2, COMPAT_OUT}, + {0, 0, 0} +}; + +static compat_t cp20932_compat[] = { + {0x00A5, 0x005C, COMPAT_OUT}, + {0x203E, 0x007E, COMPAT_OUT}, + {0x2014, 0x2015, COMPAT_OUT}, + {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, + {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, + {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, + {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, + {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, + {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, + {0, 0, 0} +}; + +static compat_t *cp51932_compat = cp932_compat; + +/* cp20932_compat for kernel. cp932_compat for mlang. */ +static compat_t *cp5022x_compat = cp932_compat; + +typedef HRESULT (WINAPI *CONVERTINETSTRING)( + LPDWORD lpdwMode, + DWORD dwSrcEncoding, + DWORD dwDstEncoding, + LPCSTR lpSrcStr, + LPINT lpnSrcSize, + LPBYTE lpDstStr, + LPINT lpnDstSize +); +typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( + LPDWORD lpdwMode, + DWORD dwSrcEncoding, + LPCSTR lpSrcStr, + LPINT lpnMultiCharCount, + LPWSTR lpDstStr, + LPINT lpnWideCharCount +); +typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( + LPDWORD lpdwMode, + DWORD dwEncoding, + LPCWSTR lpSrcStr, + LPINT lpnWideCharCount, + LPSTR lpDstStr, + LPINT lpnMultiCharCount +); +typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)( + DWORD dwSrcEncoding, + DWORD dwDstEncoding +); +typedef HRESULT (WINAPI *LCIDTORFC1766A)( + LCID Locale, + LPSTR pszRfc1766, + int nChar +); +typedef HRESULT (WINAPI *LCIDTORFC1766W)( + LCID Locale, + LPWSTR pszRfc1766, + int nChar +); +typedef HRESULT (WINAPI *RFC1766TOLCIDA)( + LCID *pLocale, + LPSTR pszRfc1766 +); +typedef HRESULT (WINAPI *RFC1766TOLCIDW)( + LCID *pLocale, + LPWSTR pszRfc1766 +); +static CONVERTINETSTRING ConvertINetString; +static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; +static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; +static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable; +static LCIDTORFC1766A LcidToRfc1766A; +static RFC1766TOLCIDA Rfc1766ToLcidA; + +static int +load_mlang(void) +{ + HMODULE h; + if (ConvertINetString != NULL) + return TRUE; + h = LoadLibrary(TEXT("mlang.dll")); + if (!h) + return FALSE; + ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString"); + ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); + ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); + IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable"); + LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A"); + Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA"); + return TRUE; +} + +iconv_t +iconv_open(const char *tocode, const char *fromcode) +{ + rec_iconv_t *cd; + + cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t)); + if (cd == NULL) + return (iconv_t)(-1); + +#if defined(USE_LIBICONV_DLL) + errno = 0; + if (libiconv_iconv_open(cd, tocode, fromcode)) + return (iconv_t)cd; +#endif + + /* reset the errno to prevent reporting wrong error code. + * 0 for unsorted error. */ + errno = 0; + if (win_iconv_open(cd, tocode, fromcode)) + return (iconv_t)cd; + + free(cd); + + return (iconv_t)(-1); +} + +int +iconv_close(iconv_t _cd) +{ + rec_iconv_t *cd = (rec_iconv_t *)_cd; + int r = cd->iconv_close(cd->cd); + int e = *(cd->_errno()); +#if defined(USE_LIBICONV_DLL) + if (cd->hlibiconv != NULL) + FreeLibrary(cd->hlibiconv); +#endif + free(cd); + errno = e; + return r; +} + +size_t +iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) +{ + rec_iconv_t *cd = (rec_iconv_t *)_cd; + size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); + errno = *(cd->_errno()); + return r; +} + +static int +win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) +{ + if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) + return FALSE; + cd->iconv_close = win_iconv_close; + cd->iconv = win_iconv; + cd->_errno = _errno; + cd->cd = (iconv_t)cd; + return TRUE; +} + +static int +win_iconv_close(iconv_t cd UNUSED) +{ + return 0; +} + +static size_t +win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) +{ + rec_iconv_t *cd = (rec_iconv_t *)_cd; + ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ + int insize; + int outsize; + int wsize; + DWORD frommode; + DWORD tomode; + uint wc; + compat_t *cp; + int i; + + if (inbuf == NULL || *inbuf == NULL) + { + if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) + { + tomode = cd->to.mode; + outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); + if (outsize == -1) + { + if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) + { + outsize = 0; + } + else + { + cd->to.mode = tomode; + return (size_t)(-1); + } + } + *outbuf += outsize; + *outbytesleft -= outsize; + } + cd->from.mode = 0; + cd->to.mode = 0; + return 0; + } + + while (*inbytesleft != 0) + { + frommode = cd->from.mode; + tomode = cd->to.mode; + wsize = MB_CHAR_MAX; + + insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); + if (insize == -1) + { + if (cd->to.flags & FLAG_IGNORE) + { + cd->from.mode = frommode; + insize = 1; + wsize = 0; + } + else + { + cd->from.mode = frommode; + return (size_t)(-1); + } + } + + if (wsize == 0) + { + *inbuf += insize; + *inbytesleft -= insize; + continue; + } + + if (cd->from.compat != NULL) + { + wc = utf16_to_ucs4(wbuf); + cp = cd->from.compat; + for (i = 0; cp[i].in != 0; ++i) + { + if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) + { + ucs4_to_utf16(cp[i].in, wbuf, &wsize); + break; + } + } + } + + if (cd->to.compat != NULL) + { + wc = utf16_to_ucs4(wbuf); + cp = cd->to.compat; + for (i = 0; cp[i].in != 0; ++i) + { + if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) + { + ucs4_to_utf16(cp[i].out, wbuf, &wsize); + break; + } + } + } + + outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); + if (outsize == -1) + { + if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) + { + cd->to.mode = tomode; + outsize = 0; + } + else + { + cd->from.mode = frommode; + cd->to.mode = tomode; + return (size_t)(-1); + } + } + + *inbuf += insize; + *outbuf += outsize; + *inbytesleft -= insize; + *outbytesleft -= outsize; + } + + return 0; +} + +static int +make_csconv(const char *_name, csconv_t *cv) +{ + CPINFO cpinfo; + int use_compat = TRUE; + int flag = 0; + char *name; + char *p; + + name = xstrndup(_name, strlen(_name)); + if (name == NULL) + return FALSE; + + /* check for option "enc_name//opt1//opt2" */ + while ((p = strrstr(name, "//")) != NULL) + { + if (_stricmp(p + 2, "nocompat") == 0) + use_compat = FALSE; + else if (_stricmp(p + 2, "translit") == 0) + flag |= FLAG_TRANSLIT; + else if (_stricmp(p + 2, "ignore") == 0) + flag |= FLAG_IGNORE; + *p = 0; + } + + cv->mode = 0; + cv->flags = flag; + cv->mblen = NULL; + cv->flush = NULL; + cv->compat = NULL; + cv->codepage = name_to_codepage(name); + if (cv->codepage == 1200 || cv->codepage == 1201) + { + cv->mbtowc = utf16_mbtowc; + cv->wctomb = utf16_wctomb; + if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || + _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 || + _stricmp(name,"UCS-2-INTERNAL") == 0) + cv->flags |= FLAG_USE_BOM; + } + else if (cv->codepage == 12000 || cv->codepage == 12001) + { + cv->mbtowc = utf32_mbtowc; + cv->wctomb = utf32_wctomb; + if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || + _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) + cv->flags |= FLAG_USE_BOM; + } + else if (cv->codepage == 65001) + { + cv->mbtowc = kernel_mbtowc; + cv->wctomb = kernel_wctomb; + cv->mblen = utf8_mblen; + } + else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) + { + cv->mbtowc = iso2022jp_mbtowc; + cv->wctomb = iso2022jp_wctomb; + cv->flush = iso2022jp_flush; + } + else if (cv->codepage == 51932 && load_mlang()) + { + cv->mbtowc = mlang_mbtowc; + cv->wctomb = mlang_wctomb; + cv->mblen = eucjp_mblen; + } + else if (IsValidCodePage(cv->codepage) + && GetCPInfo(cv->codepage, &cpinfo) != 0) + { + cv->mbtowc = kernel_mbtowc; + cv->wctomb = kernel_wctomb; + if (cpinfo.MaxCharSize == 1) + cv->mblen = sbcs_mblen; + else if (cpinfo.MaxCharSize == 2) + cv->mblen = dbcs_mblen; + else + cv->mblen = mbcs_mblen; + } + else + { + /* not supported */ + free(name); + errno = EINVAL; + return FALSE; + } + + if (use_compat) + { + switch (cv->codepage) + { + case 932: cv->compat = cp932_compat; break; + case 20932: cv->compat = cp20932_compat; break; + case 51932: cv->compat = cp51932_compat; break; + case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; + } + } + + free(name); + + return TRUE; +} + +static int +name_to_codepage(const char *name) +{ + int i; + + if (*name == '\0' || + strcmp(name, "char") == 0) + return GetACP(); + else if (strcmp(name, "wchar_t") == 0) + return 1200; + else if (_strnicmp(name, "cp", 2) == 0) + return atoi(name + 2); /* CP123 */ + else if ('0' <= name[0] && name[0] <= '9') + return atoi(name); /* 123 */ + else if (_strnicmp(name, "xx", 2) == 0) + return atoi(name + 2); /* XX123 for debug */ + + for (i = 0; codepage_alias[i].name != NULL; ++i) + if (_stricmp(name, codepage_alias[i].name) == 0) + return codepage_alias[i].codepage; + return -1; +} + +/* + * http://www.faqs.org/rfcs/rfc2781.html + */ +static uint +utf16_to_ucs4(const ushort *wbuf) +{ + uint wc = wbuf[0]; + if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) + wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; + return wc; +} + +static void +ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) +{ + if (wc < 0x10000) + { + wbuf[0] = wc; + *wbufsize = 1; + } + else + { + wc -= 0x10000; + wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); + wbuf[1] = 0xDC00 | (wc & 0x3FF); + *wbufsize = 2; + } +} + +/* + * Check if codepage is one of those for which the dwFlags parameter + * to MultiByteToWideChar() must be zero. Return zero or + * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows + * Server 2003 R2 claims that also codepage 65001 is one of these, but + * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave + * out 65001 (UTF-8), and that indeed seems to be the case on XP, it + * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting + * from UTF-8. + */ +static int +mbtowc_flags(int codepage) +{ + return (codepage == 50220 || codepage == 50221 || + codepage == 50222 || codepage == 50225 || + codepage == 50227 || codepage == 50229 || + codepage == 52936 || codepage == 54936 || + (codepage >= 57002 && codepage <= 57011) || + codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; +} + +/* + * Check if codepage is one those for which the lpUsedDefaultChar + * parameter to WideCharToMultiByte() must be NULL. The docs in + * Platform SDK for Windows Server 2003 R2 claims that this is the + * list below, while the MSDN docs for MSVS2008 claim that it is only + * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform + * SDK seems to be correct, at least for XP. + */ +static int +must_use_null_useddefaultchar(int codepage) +{ + return (codepage == 65000 || codepage == 65001 || + codepage == 50220 || codepage == 50221 || + codepage == 50222 || codepage == 50225 || + codepage == 50227 || codepage == 50229 || + codepage == 52936 || codepage == 54936 || + (codepage >= 57002 && codepage <= 57011) || + codepage == 42); +} + +static char * +strrstr(const char *str, const char *token) +{ + int len = strlen(token); + const char *p = str + strlen(str); + + while (str <= --p) + if (p[0] == token[0] && strncmp(p, token, len) == 0) + return (char *)p; + return NULL; +} + +static char * +xstrndup(const char *s, size_t n) +{ + char *p; + + p = (char *)malloc(n + 1); + if (p == NULL) + return NULL; + memcpy(p, s, n); + p[n] = '\0'; + return p; +} + +static int +seterror(int err) +{ + errno = err; + return -1; +} + +#if defined(USE_LIBICONV_DLL) +static int +libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) +{ + HMODULE hlibiconv = NULL; + char *dllname; + const char *p; + const char *e; + f_iconv_open _iconv_open; + + /* + * always try to load dll, so that we can switch dll in runtime. + */ + + /* XXX: getenv() can't get variable set by SetEnvironmentVariable() */ + p = getenv("WINICONV_LIBICONV_DLL"); + if (p == NULL) + p = DEFAULT_LIBICONV_DLL; + /* parse comma separated value */ + for ( ; *p != 0; p = (*e == ',') ? e + 1 : e) + { + e = strchr(p, ','); + if (p == e) + continue; + else if (e == NULL) + e = p + strlen(p); + dllname = xstrndup(p, e - p); + if (dllname == NULL) + return FALSE; + hlibiconv = LoadLibraryA(dllname); + free(dllname); + if (hlibiconv != NULL) + { + if (hlibiconv == hwiniconv) + { + FreeLibrary(hlibiconv); + hlibiconv = NULL; + continue; + } + break; + } + } + + if (hlibiconv == NULL) + goto failed; + + _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "libiconv_open"); + if (_iconv_open == NULL) + _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "iconv_open"); + cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "libiconv_close"); + if (cd->iconv_close == NULL) + cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "iconv_close"); + cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "libiconv"); + if (cd->iconv == NULL) + cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "iconv"); + cd->_errno = (f_errno)find_imported_function(hlibiconv, "_errno"); + if (_iconv_open == NULL || cd->iconv_close == NULL + || cd->iconv == NULL || cd->_errno == NULL) + goto failed; + + cd->cd = _iconv_open(tocode, fromcode); + if (cd->cd == (iconv_t)(-1)) + goto failed; + + cd->hlibiconv = hlibiconv; + return TRUE; + +failed: + if (hlibiconv != NULL) + FreeLibrary(hlibiconv); + return FALSE; +} + +/* + * Reference: + * http://forums.belution.com/ja/vc/000/234/78s.shtml + * http://nienie.com/~masapico/api_ImageDirectoryEntryToData.html + * + * The formal way is + * imagehlp.h or dbghelp.h + * imagehlp.lib or dbghelp.lib + * ImageDirectoryEntryToData() + */ +#define TO_DOS_HEADER(base) ((PIMAGE_DOS_HEADER)(base)) +#define TO_NT_HEADERS(base) ((PIMAGE_NT_HEADERS)((LPBYTE)(base) + TO_DOS_HEADER(base)->e_lfanew)) +static PVOID +MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size) +{ + /* TODO: MappedAsImage? */ + PIMAGE_DATA_DIRECTORY p; + p = TO_NT_HEADERS(Base)->OptionalHeader.DataDirectory + DirectoryEntry; + if (p->VirtualAddress == 0) { + *Size = 0; + return NULL; + } + *Size = p->Size; + return (PVOID)((LPBYTE)Base + p->VirtualAddress); +} + +static FARPROC +find_imported_function(HMODULE hModule, const char *funcname) +{ + DWORD_PTR Base; + ULONG Size; + PIMAGE_IMPORT_DESCRIPTOR Imp; + PIMAGE_THUNK_DATA Address; /* Import Address Table */ + PIMAGE_THUNK_DATA Name; /* Import Name Table */ + PIMAGE_IMPORT_BY_NAME ImpName; + + Base = (DWORD_PTR)hModule; + Imp = (PIMAGE_IMPORT_DESCRIPTOR)MyImageDirectoryEntryToData( + (LPVOID)Base, + TRUE, + IMAGE_DIRECTORY_ENTRY_IMPORT, + &Size); + if (Imp == NULL) + return NULL; + for ( ; Imp->OriginalFirstThunk != 0; ++Imp) + { + Address = (PIMAGE_THUNK_DATA)(Base + Imp->FirstThunk); + Name = (PIMAGE_THUNK_DATA)(Base + Imp->OriginalFirstThunk); + for ( ; Name->u1.Ordinal != 0; ++Name, ++Address) + { + if (!IMAGE_SNAP_BY_ORDINAL(Name->u1.Ordinal)) + { + ImpName = (PIMAGE_IMPORT_BY_NAME) + (Base + (DWORD_PTR)Name->u1.AddressOfData); + if (strcmp((char *)ImpName->Name, funcname) == 0) + return (FARPROC)Address->u1.Function; + } + } + } + return NULL; +} +#endif + +static int +sbcs_mblen(csconv_t *cv UNUSED, const uchar *buf UNUSED, int bufsize UNUSED) +{ + return 1; +} + +static int +dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) +{ + int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; + if (bufsize < len) + return seterror(EINVAL); + return len; +} + +static int +mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) +{ + int len = 0; + + if (cv->codepage == 54936) { + if (buf[0] <= 0x7F) len = 1; + else if (buf[0] >= 0x81 && buf[0] <= 0xFE && + bufsize >= 2 && + ((buf[1] >= 0x40 && buf[1] <= 0x7E) || + (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2; + else if (buf[0] >= 0x81 && buf[0] <= 0xFE && + bufsize >= 4 && + buf[1] >= 0x30 && buf[1] <= 0x39) len = 4; + else + return seterror(EINVAL); + return len; + } + else + return seterror(EINVAL); +} + +static int +utf8_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) +{ + int len = 0; + + if (buf[0] < 0x80) len = 1; + else if ((buf[0] & 0xE0) == 0xC0) len = 2; + else if ((buf[0] & 0xF0) == 0xE0) len = 3; + else if ((buf[0] & 0xF8) == 0xF0) len = 4; + else if ((buf[0] & 0xFC) == 0xF8) len = 5; + else if ((buf[0] & 0xFE) == 0xFC) len = 6; + + if (len == 0) + return seterror(EILSEQ); + else if (bufsize < len) + return seterror(EINVAL); + return len; +} + +static int +eucjp_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) +{ + if (buf[0] < 0x80) /* ASCII */ + return 1; + else if (buf[0] == 0x8E) /* JIS X 0201 */ + { + if (bufsize < 2) + return seterror(EINVAL); + else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) + return seterror(EILSEQ); + return 2; + } + else if (buf[0] == 0x8F) /* JIS X 0212 */ + { + if (bufsize < 3) + return seterror(EINVAL); + else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) + || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) + return seterror(EILSEQ); + return 3; + } + else /* JIS X 0208 */ + { + if (bufsize < 2) + return seterror(EINVAL); + else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) + || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) + return seterror(EILSEQ); + return 2; + } +} + +static int +kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) +{ + int len; + + len = cv->mblen(cv, buf, bufsize); + if (len == -1) + return -1; + /* If converting from ASCII, reject 8bit + * chars. MultiByteToWideChar() doesn't. Note that for ASCII we + * know that the mblen function is sbcs_mblen() so len is 1. + */ + if (cv->codepage == 20127 && buf[0] >= 0x80) + return seterror(EILSEQ); + *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), + (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); + if (*wbufsize == 0) + return seterror(EILSEQ); + return len; +} + +static int +kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) +{ + BOOL usedDefaultChar = 0; + BOOL *p = NULL; + int flags = 0; + int len; + + if (bufsize == 0) + return seterror(E2BIG); + if (!must_use_null_useddefaultchar(cv->codepage)) + { + p = &usedDefaultChar; +#ifdef WC_NO_BEST_FIT_CHARS + if (!(cv->flags & FLAG_TRANSLIT)) + flags |= WC_NO_BEST_FIT_CHARS; +#endif + } + len = WideCharToMultiByte(cv->codepage, flags, + (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); + if (len == 0) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + return seterror(E2BIG); + return seterror(EILSEQ); + } + else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) + return seterror(EILSEQ); + else if (cv->mblen(cv, buf, len) != len) /* validate result */ + return seterror(EILSEQ); + return len; +} + +/* + * It seems that the mode (cv->mode) is fixnum. + * For example, when converting iso-2022-jp(cp50221) to unicode: + * in ascii sequence: mode=0xC42C0000 + * in jisx0208 sequence: mode=0xC42C0001 + * "C42C" is same for each convert session. + * It should be: ((codepage-1)<<16)|state + */ +static int +mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) +{ + int len; + int insize; + HRESULT hr; + + len = cv->mblen(cv, buf, bufsize); + if (len == -1) + return -1; + insize = len; + hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, + (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); + if (hr != S_OK || insize != len) + return seterror(EILSEQ); + return len; +} + +static int +mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) +{ + char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ + int tmpsize = MB_CHAR_MAX; + int insize = wbufsize; + HRESULT hr; + + hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, + (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); + if (hr != S_OK || insize != wbufsize) + return seterror(EILSEQ); + else if (bufsize < tmpsize) + return seterror(E2BIG); + else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) + return seterror(EILSEQ); + memcpy(buf, tmpbuf, tmpsize); + return tmpsize; +} + +static int +utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) +{ + int codepage = cv->codepage; + + /* swap endian: 1200 <-> 1201 */ + if (cv->mode & UNICODE_MODE_SWAPPED) + codepage ^= 1; + + if (bufsize < 2) + return seterror(EINVAL); + if (codepage == 1200) /* little endian */ + wbuf[0] = (buf[1] << 8) | buf[0]; + else if (codepage == 1201) /* big endian */ + wbuf[0] = (buf[0] << 8) | buf[1]; + + if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) + { + cv->mode |= UNICODE_MODE_BOM_DONE; + if (wbuf[0] == 0xFFFE) + { + cv->mode |= UNICODE_MODE_SWAPPED; + *wbufsize = 0; + return 2; + } + else if (wbuf[0] == 0xFEFF) + { + *wbufsize = 0; + return 2; + } + } + + if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) + return seterror(EILSEQ); + if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) + { + if (bufsize < 4) + return seterror(EINVAL); + if (codepage == 1200) /* little endian */ + wbuf[1] = (buf[3] << 8) | buf[2]; + else if (codepage == 1201) /* big endian */ + wbuf[1] = (buf[2] << 8) | buf[3]; + if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) + return seterror(EILSEQ); + *wbufsize = 2; + return 4; + } + *wbufsize = 1; + return 2; +} + +static int +utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) +{ + if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) + { + int r; + + cv->mode |= UNICODE_MODE_BOM_DONE; + if (bufsize < 2) + return seterror(E2BIG); + if (cv->codepage == 1200) /* little endian */ + memcpy(buf, "\xFF\xFE", 2); + else if (cv->codepage == 1201) /* big endian */ + memcpy(buf, "\xFE\xFF", 2); + + r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); + if (r == -1) + return -1; + return r + 2; + } + + if (bufsize < 2) + return seterror(E2BIG); + if (cv->codepage == 1200) /* little endian */ + { + buf[0] = (wbuf[0] & 0x00FF); + buf[1] = (wbuf[0] & 0xFF00) >> 8; + } + else if (cv->codepage == 1201) /* big endian */ + { + buf[0] = (wbuf[0] & 0xFF00) >> 8; + buf[1] = (wbuf[0] & 0x00FF); + } + if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) + { + if (bufsize < 4) + return seterror(E2BIG); + if (cv->codepage == 1200) /* little endian */ + { + buf[2] = (wbuf[1] & 0x00FF); + buf[3] = (wbuf[1] & 0xFF00) >> 8; + } + else if (cv->codepage == 1201) /* big endian */ + { + buf[2] = (wbuf[1] & 0xFF00) >> 8; + buf[3] = (wbuf[1] & 0x00FF); + } + return 4; + } + return 2; +} + +static int +utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) +{ + int codepage = cv->codepage; + uint wc = 0xD800; + + /* swap endian: 12000 <-> 12001 */ + if (cv->mode & UNICODE_MODE_SWAPPED) + codepage ^= 1; + + if (bufsize < 4) + return seterror(EINVAL); + if (codepage == 12000) /* little endian */ + wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + else if (codepage == 12001) /* big endian */ + wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + + if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) + { + cv->mode |= UNICODE_MODE_BOM_DONE; + if (wc == 0xFFFE0000) + { + cv->mode |= UNICODE_MODE_SWAPPED; + *wbufsize = 0; + return 4; + } + else if (wc == 0x0000FEFF) + { + *wbufsize = 0; + return 4; + } + } + + if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) + return seterror(EILSEQ); + ucs4_to_utf16(wc, wbuf, wbufsize); + return 4; +} + +static int +utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) +{ + uint wc; + + if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) + { + int r; + + cv->mode |= UNICODE_MODE_BOM_DONE; + if (bufsize < 4) + return seterror(E2BIG); + if (cv->codepage == 12000) /* little endian */ + memcpy(buf, "\xFF\xFE\x00\x00", 4); + else if (cv->codepage == 12001) /* big endian */ + memcpy(buf, "\x00\x00\xFE\xFF", 4); + + r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); + if (r == -1) + return -1; + return r + 4; + } + + if (bufsize < 4) + return seterror(E2BIG); + wc = utf16_to_ucs4(wbuf); + if (cv->codepage == 12000) /* little endian */ + { + buf[0] = wc & 0x000000FF; + buf[1] = (wc & 0x0000FF00) >> 8; + buf[2] = (wc & 0x00FF0000) >> 16; + buf[3] = (wc & 0xFF000000) >> 24; + } + else if (cv->codepage == 12001) /* big endian */ + { + buf[0] = (wc & 0xFF000000) >> 24; + buf[1] = (wc & 0x00FF0000) >> 16; + buf[2] = (wc & 0x0000FF00) >> 8; + buf[3] = wc & 0x000000FF; + } + return 4; +} + +/* + * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow + * 1 byte Kana) + * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte + * Kana - SO/SI) + * + * MultiByteToWideChar() and WideCharToMultiByte() behave differently + * depending on Windows version. On XP, WideCharToMultiByte() doesn't + * terminate result sequence with ascii escape. But Vista does. + * Use MLang instead. + */ + +#define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) +#define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) +#define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) + +#define ISO2022_SI 0 +#define ISO2022_SO 1 + +/* shift in */ +static const char iso2022_SI_seq[] = "\x0F"; +/* shift out */ +static const char iso2022_SO_seq[] = "\x0E"; + +typedef struct iso2022_esc_t iso2022_esc_t; +struct iso2022_esc_t { + const char *esc; + int esc_len; + int len; + int cs; +}; + +#define ISO2022JP_CS_ASCII 0 +#define ISO2022JP_CS_JISX0201_ROMAN 1 +#define ISO2022JP_CS_JISX0201_KANA 2 +#define ISO2022JP_CS_JISX0208_1978 3 +#define ISO2022JP_CS_JISX0208_1983 4 +#define ISO2022JP_CS_JISX0212 5 + +static iso2022_esc_t iso2022jp_esc[] = { + {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, + {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, + {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, + {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ + {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, + {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, + {NULL, 0, 0, 0} +}; + +static int +iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) +{ + iso2022_esc_t *iesc = iso2022jp_esc; + char tmp[MB_CHAR_MAX]; + int insize; + HRESULT hr; + DWORD dummy = 0; + int len; + int esc_len; + int cs; + int shift; + int i; + + if (buf[0] == 0x1B) + { + for (i = 0; iesc[i].esc != NULL; ++i) + { + esc_len = iesc[i].esc_len; + if (bufsize < esc_len) + { + if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) + return seterror(EINVAL); + } + else + { + if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) + { + cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); + *wbufsize = 0; + return esc_len; + } + } + } + /* not supported escape sequence */ + return seterror(EILSEQ); + } + else if (buf[0] == iso2022_SO_seq[0]) + { + cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); + *wbufsize = 0; + return 1; + } + else if (buf[0] == iso2022_SI_seq[0]) + { + cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); + *wbufsize = 0; + return 1; + } + + cs = ISO2022_MODE_CS(cv->mode); + shift = ISO2022_MODE_SHIFT(cv->mode); + + /* reset the mode for informal sequence */ + if (buf[0] < 0x20) + { + cs = ISO2022JP_CS_ASCII; + shift = ISO2022_SI; + } + + len = iesc[cs].len; + if (bufsize < len) + return seterror(EINVAL); + for (i = 0; i < len; ++i) + if (!(buf[i] < 0x80)) + return seterror(EILSEQ); + esc_len = iesc[cs].esc_len; + memcpy(tmp, iesc[cs].esc, esc_len); + if (shift == ISO2022_SO) + { + memcpy(tmp + esc_len, iso2022_SO_seq, 1); + esc_len += 1; + } + memcpy(tmp + esc_len, buf, len); + + if ((cv->codepage == 50220 || cv->codepage == 50221 + || cv->codepage == 50222) && shift == ISO2022_SO) + { + /* XXX: shift-out cannot be used for mbtowc (both kernel and + * mlang) */ + esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; + memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); + memcpy(tmp + esc_len, buf, len); + } + + insize = len + esc_len; + hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, + (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); + if (hr != S_OK || insize != len + esc_len) + return seterror(EILSEQ); + + /* Check for conversion error. Assuming defaultChar is 0x3F. */ + /* ascii should be converted from ascii */ + if (wbuf[0] == buf[0] + && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) + return seterror(EILSEQ); + + /* reset the mode for informal sequence */ + if (cv->mode != ISO2022_MODE(cs, shift)) + cv->mode = ISO2022_MODE(cs, shift); + + return len; +} + +static int +iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) +{ + iso2022_esc_t *iesc = iso2022jp_esc; + char tmp[MB_CHAR_MAX]; + int tmpsize = MB_CHAR_MAX; + int insize = wbufsize; + HRESULT hr; + DWORD dummy = 0; + int len; + int esc_len; + int cs; + int shift; + int i; + + /* + * MultiByte = [escape sequence] + character + [escape sequence] + * + * Whether trailing escape sequence is added depends on which API is + * used (kernel or MLang, and its version). + */ + hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, + (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); + if (hr != S_OK || insize != wbufsize) + return seterror(EILSEQ); + else if (bufsize < tmpsize) + return seterror(E2BIG); + + if (tmpsize == 1) + { + cs = ISO2022JP_CS_ASCII; + esc_len = 0; + } + else + { + for (i = 1; iesc[i].esc != NULL; ++i) + { + esc_len = iesc[i].esc_len; + if (strncmp(tmp, iesc[i].esc, esc_len) == 0) + { + cs = iesc[i].cs; + break; + } + } + if (iesc[i].esc == NULL) + /* not supported escape sequence */ + return seterror(EILSEQ); + } + + shift = ISO2022_SI; + if (tmp[esc_len] == iso2022_SO_seq[0]) + { + shift = ISO2022_SO; + esc_len += 1; + } + + len = iesc[cs].len; + + /* Check for converting error. Assuming defaultChar is 0x3F. */ + /* ascii should be converted from ascii */ + if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) + return seterror(EILSEQ); + else if (tmpsize < esc_len + len) + return seterror(EILSEQ); + + if (cv->mode == ISO2022_MODE(cs, shift)) + { + /* remove escape sequence */ + if (esc_len != 0) + memmove(tmp, tmp + esc_len, len); + esc_len = 0; + } + else + { + if (cs == ISO2022JP_CS_ASCII) + { + esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; + memmove(tmp + esc_len, tmp, len); + memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); + } + if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) + { + /* shift-in before changing to other mode */ + memmove(tmp + 1, tmp, len + esc_len); + memcpy(tmp, iso2022_SI_seq, 1); + esc_len += 1; + } + } + + if (bufsize < len + esc_len) + return seterror(E2BIG); + memcpy(buf, tmp, len + esc_len); + cv->mode = ISO2022_MODE(cs, shift); + return len + esc_len; +} + +static int +iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) +{ + iso2022_esc_t *iesc = iso2022jp_esc; + int esc_len; + + if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) + { + esc_len = 0; + if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) + esc_len += 1; + if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) + esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; + if (bufsize < esc_len) + return seterror(E2BIG); + + esc_len = 0; + if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) + { + memcpy(buf, iso2022_SI_seq, 1); + esc_len += 1; + } + if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) + { + memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, + iesc[ISO2022JP_CS_ASCII].esc_len); + esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; + } + return esc_len; + } + return 0; +} + +#if defined(MAKE_DLL) && defined(USE_LIBICONV_DLL) +BOOL WINAPI +DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) +{ + switch( fdwReason ) + { + case DLL_PROCESS_ATTACH: + hwiniconv = (HMODULE)hinstDLL; + break; + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; + } + return TRUE; +} +#endif + +#if defined(MAKE_EXE) +#include +#include +#include +int +main(int argc, char **argv) +{ + char *fromcode = NULL; + char *tocode = NULL; + int i; + char inbuf[BUFSIZ]; + char outbuf[BUFSIZ]; + const char *pin; + char *pout; + size_t inbytesleft; + size_t outbytesleft; + size_t rest = 0; + iconv_t cd; + size_t r; + FILE *in = stdin; + FILE *out = stdout; + int ignore = 0; + char *p; + + _setmode(_fileno(stdin), _O_BINARY); + _setmode(_fileno(stdout), _O_BINARY); + + for (i = 1; i < argc; ++i) + { + if (strcmp(argv[i], "-l") == 0) + { + for (i = 0; codepage_alias[i].name != NULL; ++i) + printf("%s\n", codepage_alias[i].name); + return 0; + } + + if (strcmp(argv[i], "-f") == 0) + fromcode = argv[++i]; + else if (strcmp(argv[i], "-t") == 0) + tocode = argv[++i]; + else if (strcmp(argv[i], "-c") == 0) + ignore = 1; + else if (strcmp(argv[i], "--output") == 0) + { + out = fopen(argv[++i], "wb"); + if(out == NULL) + { + fprintf(stderr, "cannot open %s\n", argv[i]); + return 1; + } + } + else + { + in = fopen(argv[i], "rb"); + if (in == NULL) + { + fprintf(stderr, "cannot open %s\n", argv[i]); + return 1; + } + break; + } + } + + if (fromcode == NULL || tocode == NULL) + { + printf("usage: %s [-c] -f from-enc -t to-enc [file]\n", argv[0]); + return 0; + } + + if (ignore) + { + p = tocode; + tocode = (char *)malloc(strlen(p) + strlen("//IGNORE") + 1); + if (tocode == NULL) + { + perror("fatal error"); + return 1; + } + strcpy(tocode, p); + strcat(tocode, "//IGNORE"); + } + + cd = iconv_open(tocode, fromcode); + if (cd == (iconv_t)(-1)) + { + perror("iconv_open error"); + return 1; + } + + while ((inbytesleft = fread(inbuf + rest, 1, sizeof(inbuf) - rest, in)) != 0 + || rest != 0) + { + inbytesleft += rest; + pin = inbuf; + pout = outbuf; + outbytesleft = sizeof(outbuf); + r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); + fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); + if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in))) + { + perror("conversion error"); + return 1; + } + memmove(inbuf, pin, inbytesleft); + rest = inbytesleft; + } + pout = outbuf; + outbytesleft = sizeof(outbuf); + r = iconv(cd, NULL, NULL, &pout, &outbytesleft); + fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); + if (r == (size_t)(-1)) + { + perror("conversion error"); + return 1; + } + + iconv_close(cd); + + return 0; +} +#endif + diff --git a/vendor/win-iconv/win_iconv_test.c b/vendor/win-iconv/win_iconv_test.c new file mode 100644 index 00000000..14b8b743 --- /dev/null +++ b/vendor/win-iconv/win_iconv_test.c @@ -0,0 +1,286 @@ + +#ifdef USE_ICONV_H +#include +#include +#include +#include +#include +#else +#include "win_iconv.c" +#endif + +#include + +const char * +tohex(const char *str, int size) +{ + static char buf[BUFSIZ]; + char *pbuf = buf; + int i; + buf[0] = 0; + for (i = 0; i < size; ++i) + pbuf += sprintf(pbuf, "%02X", str[i] & 0xFF); + return buf; +} + +const char * +errstr(int errcode) +{ + static char buf[BUFSIZ]; + switch (errcode) + { + case 0: return "NOERROR"; + case EINVAL: return "EINVAL"; + case EILSEQ: return "EILSEQ"; + case E2BIG: return "E2BIG"; + } + sprintf(buf, "%d\n", errcode); + return buf; +} + +#ifdef USE_LIBICONV_DLL +int use_dll; + +int +setdll(const char *dllpath) +{ + char buf[BUFSIZ]; + rec_iconv_t cd; + + sprintf(buf, "WINICONV_LIBICONV_DLL=%s", dllpath); + putenv(buf); + if (libiconv_iconv_open(&cd, "ascii", "ascii")) + { + FreeLibrary(cd.hlibiconv); + use_dll = TRUE; + return TRUE; + } + use_dll = FALSE; + return FALSE; +} +#endif + +/* + * We can test the codepage that is installed in the system. + */ +int +check_enc(const char *encname, int codepage) +{ + iconv_t cd; + int cp; + cd = iconv_open("utf-8", encname); + if (cd == (iconv_t)(-1)) + { + printf("%s(%d) IS NOT SUPPORTED: SKIP THE TEST\n", encname, codepage); + return FALSE; + } +#ifndef USE_ICONV_H + cp = ((rec_iconv_t *)cd)->from.codepage; + if (cp != codepage) + { + printf("%s(%d) ALIAS IS MAPPED TO DIFFERENT CODEPAGE (%d)\n", encname, codepage, cp); + exit(1); + } +#endif + iconv_close(cd); + return TRUE; +} + +void +test(const char *from, const char *fromstr, int fromsize, const char *to, const char *tostr, int tosize, int errcode, int bufsize, int line) +{ + char outbuf[BUFSIZ]; + const char *pin; + char *pout; + size_t inbytesleft; + size_t outbytesleft; + iconv_t cd; + size_t r; +#ifdef USE_LIBICONV_DLL + char dllpath[_MAX_PATH]; +#endif + + cd = iconv_open(to, from); + if (cd == (iconv_t)(-1)) + { + printf("%s -> %s: NG: INVALID ENCODING NAME: line=%d\n", from, to, line); + exit(1); + } + +#ifdef USE_LIBICONV_DLL + if (((rec_iconv_t *)cd)->hlibiconv != NULL) + GetModuleFileNameA(((rec_iconv_t *)cd)->hlibiconv, dllpath, sizeof(dllpath)); + + if (use_dll && ((rec_iconv_t *)cd)->hlibiconv == NULL) + { + printf("%s: %s -> %s: NG: FAILED TO USE DLL: line=%d\n", dllpath, from, to, line); + exit(1); + } + else if (!use_dll && ((rec_iconv_t *)cd)->hlibiconv != NULL) + { + printf("%s: %s -> %s: NG: DLL IS LOADED UNEXPECTEDLY: line=%d\n", dllpath, from, to, line); + exit(1); + } +#endif + + errno = 0; + + pin = (char *)fromstr; + pout = outbuf; + inbytesleft = fromsize; + outbytesleft = bufsize; + r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); + if (r != (size_t)(-1)) + r = iconv(cd, NULL, NULL, &pout, &outbytesleft); + *pout = 0; + +#ifdef USE_LIBICONV_DLL + if (use_dll) + printf("%s: ", dllpath); +#endif + printf("%s(%s) -> ", from, tohex(fromstr, fromsize)); + printf("%s(%s%s%s): ", to, tohex(tostr, tosize), + errcode == 0 ? "" : ":", + errcode == 0 ? "" : errstr(errcode)); + if (strcmp(outbuf, tostr) == 0 && errno == errcode) + printf("OK\n"); + else + { + printf("RESULT(%s:%s): ", tohex(outbuf, sizeof(outbuf) - outbytesleft), + errstr(errno)); + printf("NG: line=%d\n", line); + exit(1); + } +} + +#define STATIC_STRLEN(arr) (sizeof(arr) - 1) + +#define success(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), 0, BUFSIZ, __LINE__) +#define einval(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EINVAL, BUFSIZ, __LINE__) +#define eilseq(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EILSEQ, BUFSIZ, __LINE__) +#define e2big(from, fromstr, to, tostr, bufsize) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), E2BIG, bufsize, __LINE__) + +int +main(int argc, char **argv) +{ +#ifdef USE_LIBICONV_DLL + /* test use of dll if $DEFAULT_LIBICONV_DLL was defined. */ + if (setdll("")) + { + success("ascii", "ABC", "ascii", "ABC"); + success("ascii", "ABC", "utf-16be", "\x00\x41\x00\x42\x00\x43"); + } + else + { + printf("\nDLL TEST IS SKIPPED\n\n"); + } + + setdll("none"); +#endif + + if (check_enc("ascii", 20127)) + { + success("ascii", "ABC", "ascii", "ABC"); + eilseq("ascii", "\x80", "ascii", ""); + eilseq("ascii", "\xFF", "ascii", ""); + } + + /* unicode (CP1200 CP1201 CP12000 CP12001 CP65001) */ + if (check_enc("utf-8", 65001) + && check_enc("utf-16be", 1201) && check_enc("utf-16le", 1200) + && check_enc("utf-32be", 12001) && check_enc("utf-32le", 12000) + ) + { + /* Test the BOM behavior + * 1. Remove the BOM when "fromcode" is utf-16 or utf-32. + * 2. Add the BOM when "tocode" is utf-16 or utf-32. */ + success("utf-16", "\xFE\xFF\x01\x02", "utf-16be", "\x01\x02"); + success("utf-16", "\xFF\xFE\x02\x01", "utf-16be", "\x01\x02"); + success("utf-32", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\x01\x02"); + success("utf-32", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\x01\x02"); + success("utf-16", "\xFE\xFF\x00\x01", "utf-8", "\x01"); +#ifndef GLIB_COMPILATION + success("utf-8", "\x01", "utf-16", "\xFE\xFF\x00\x01"); + success("utf-8", "\x01", "utf-32", "\x00\x00\xFE\xFF\x00\x00\x00\x01"); +#else + success("utf-8", "\x01", "utf-16", "\xFF\xFE\x01\x00"); + success("utf-8", "\x01", "utf-32", "\xFF\xFE\x00\x00\x01\x00\x00\x00"); +#endif + + success("utf-16be", "\xFE\xFF\x01\x02", "utf-16be", "\xFE\xFF\x01\x02"); + success("utf-16le", "\xFF\xFE\x02\x01", "utf-16be", "\xFE\xFF\x01\x02"); + success("utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); + success("utf-32le", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); + success("utf-16be", "\xFE\xFF\x00\x01", "utf-8", "\xEF\xBB\xBF\x01"); + success("utf-8", "\xEF\xBB\xBF\x01", "utf-8", "\xEF\xBB\xBF\x01"); + + success("utf-16be", "\x01\x02", "utf-16le", "\x02\x01"); + success("utf-16le", "\x02\x01", "utf-16be", "\x01\x02"); + success("utf-16be", "\xFE\xFF", "utf-16le", "\xFF\xFE"); + success("utf-16le", "\xFF\xFE", "utf-16be", "\xFE\xFF"); + success("utf-32be", "\x00\x00\x03\x04", "utf-32le", "\x04\x03\x00\x00"); + success("utf-32le", "\x04\x03\x00\x00", "utf-32be", "\x00\x00\x03\x04"); + success("utf-32be", "\x00\x00\xFF\xFF", "utf-16be", "\xFF\xFF"); + success("utf-16be", "\xFF\xFF", "utf-32be", "\x00\x00\xFF\xFF"); + success("utf-32be", "\x00\x01\x00\x00", "utf-16be", "\xD8\x00\xDC\x00"); + success("utf-16be", "\xD8\x00\xDC\x00", "utf-32be", "\x00\x01\x00\x00"); + success("utf-32be", "\x00\x10\xFF\xFF", "utf-16be", "\xDB\xFF\xDF\xFF"); + success("utf-16be", "\xDB\xFF\xDF\xFF", "utf-32be", "\x00\x10\xFF\xFF"); + eilseq("utf-32be", "\x00\x11\x00\x00", "utf-16be", ""); + eilseq("utf-16be", "\xDB\xFF\xE0\x00", "utf-32be", ""); + success("utf-8", "\xE3\x81\x82", "utf-16be", "\x30\x42"); + einval("utf-8", "\xE3", "utf-16be", ""); + } + + /* Japanese (CP932 CP20932 CP50220 CP50221 CP50222 CP51932) */ + if (check_enc("cp932", 932) + && check_enc("cp20932", 20932) && check_enc("euc-jp", 51932) + && check_enc("cp50220", 50220) && check_enc("cp50221", 50221) + && check_enc("cp50222", 50222) && check_enc("iso-2022-jp", 50221)) + { + /* Test the compatibility for each other Japanese codepage. + * And validate the escape sequence handling for iso-2022-jp. */ + success("utf-16be", "\xFF\x5E", "cp932", "\x81\x60"); + success("utf-16be", "\x30\x1C", "cp932", "\x81\x60"); + success("utf-16be", "\xFF\x5E", "cp932//nocompat", "\x81\x60"); + eilseq("utf-16be", "\x30\x1C", "cp932//nocompat", ""); + success("euc-jp", "\xA4\xA2", "utf-16be", "\x30\x42"); + einval("euc-jp", "\xA4\xA2\xA4", "utf-16be", "\x30\x42"); + eilseq("euc-jp", "\xA4\xA2\xFF\xFF", "utf-16be", "\x30\x42"); + success("cp932", "\x81\x60", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); + success("UTF-16BE", "\xFF\x5E", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); + eilseq("UTF-16BE", "\x30\x1C", "iso-2022-jp//nocompat", ""); + success("UTF-16BE", "\x30\x42\x30\x44", "iso-2022-jp", "\x1B\x24\x42\x24\x22\x24\x24\x1B\x28\x42"); + success("iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42", "UTF-16BE", "\xFF\x5E"); + } + + /* + * test for //translit + * U+FF41 (FULLWIDTH LATIN SMALL LETTER A) <-> U+0062 (LATIN SMALL LETTER A) + */ + eilseq("UTF-16BE", "\xFF\x41", "iso-8859-1", ""); + success("UTF-16BE", "\xFF\x41", "iso-8859-1//translit", "a"); + + /* + * test for //translit + * Some character, not in "to" encoding -> DEFAULT CHARACTER (maybe "?") + */ + eilseq("UTF-16BE", "\x30\x42", "ascii", ""); + success("UTF-16BE", "\x30\x42", "ascii//translit", "?"); + + /* + * test for //ignore + */ + eilseq("UTF-8", "\xFF A \xFF B", "ascii//ignore", " A B"); + eilseq("UTF-8", "\xEF\xBC\xA1 A \xEF\xBC\xA2 B", "ascii//ignore", " A B"); + eilseq("UTF-8", "\xEF\x01 A \xEF\x02 B", "ascii//ignore", "\x01 A \x02 B"); + + /* + * TODO: + * Test for state after iconv() failed. + * Ensure iconv() error is safe and continuable. + */ + + return 0; +} + From 0d9ec7607000f9da7a9bb7393ab2cf2a3ecfd16d Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 24 Oct 2023 14:05:49 -0400 Subject: [PATCH 07/14] CI: Test Node 14 with windows-2019 image Node 14 is pretty old by now. Node 14 latest was Node v14.21.3, which came with npm v6.14.8, which came with node-gyp v5.1.1. node-gyp v5.1.1 doesn't support Visual Studio 2022, which is the most recent version and is included in the windows-latest GitHub-hosted runner for GitHub Actions. Using the windows-2019 GitHub-hosted runner for Node 14 on Windows should work, since old node-gyp v5.1.1 should work with Visual Studio 2019... Hopefully. --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79c442ee..decc870e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,12 @@ jobs: - 14 - 16 - 18 + exclude: + - os: windows-latest + node_version: 14 + include: + - os: windows-2019 + node_version: 14 name: Node ${{ matrix.node_version }} on ${{ matrix.os }} steps: From 8dcffae12598054c7b80b735c6cf4479e3c77771 Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 24 Oct 2023 21:45:33 -0400 Subject: [PATCH 08/14] src: Include cstdint in regex.h (fix building on g++ 13) I don't know anything about C or C++, other than this fixes the compilation error on g++ 13. Tested in an Ubuntu 23.10 Docker container. --- src/core/regex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/regex.h b/src/core/regex.h index 10970306..750a2362 100644 --- a/src/core/regex.h +++ b/src/core/regex.h @@ -1,6 +1,7 @@ #ifndef REGEX_H_ #define REGEX_H_ +#include #include "optional.h" #include From b63a2082bcb8a0332076cd0d6ed74f03e7661b3a Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 7 Nov 2023 12:34:18 -0500 Subject: [PATCH 09/14] CI: Update actions to latest semver major --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index decc870e..ae218868 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,18 +27,18 @@ jobs: name: Node ${{ matrix.node_version }} on ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: true - name: Cache - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | 'node_modules' key: ${{ runner.os }}-${{ matrix.node_version }}-${{ hashFiles('package.json') }} - name: Setup node - uses: actions/setup-node@v2-beta + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }} From 5949f8c93da2fc9889e2d0efc3ec02cd760feda1 Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 7 Nov 2023 12:50:31 -0500 Subject: [PATCH 10/14] CI: No quotation marks for cache path The quotation marks seem to be intepreted literally, breaking caching, since a dir named (literally, with quotes in the name) 'node_modules' doesn't exist? (Either that, or the cache actions or its glob dependency are just super broken.) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ae218868..380ae205 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,7 @@ jobs: uses: actions/cache@v3 with: path: | - 'node_modules' + node_modules key: ${{ runner.os }}-${{ matrix.node_version }}-${{ hashFiles('package.json') }} - name: Setup node From 282ed57cc9ed5bb7bea7208259d0615d3ecd0ea5 Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 7 Nov 2023 12:53:46 -0500 Subject: [PATCH 11/14] CI: Install Python setuptools for Python 3.12+ This ensures compatibility with most versions of node-gyp. There is a fix for Python 3.12 compatibility in node-gyp 10, but older versions of npm come with older node-gyp, so here we are. --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 380ae205..0b05a929 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,12 @@ jobs: with: node-version: ${{ matrix.node_version }} + - name: Install Python setuptools + # This is needed for Python 3.12+, since many versions of node-gyp + # are incompatible with Python 3.12+, which no-longer ships 'distutils' + # out of the box. 'setuptools' package provides 'distutils'. + run: python3 -m pip install setuptools + - name: Install dependencies run: npm install From 037a69874f2107c401a324cc1051eac4be0deef4 Mon Sep 17 00:00:00 2001 From: DeeDeeG Date: Tue, 7 Nov 2023 13:16:52 -0500 Subject: [PATCH 12/14] CI: Install older Python (Python 3.10) on Node 14 runs For compatibility with old node-gyp, which is shipped with old npm, which is shipped with Node 14. Very old node-gyp isn't compatible with Python 3.11+. Install Python 3.10 instead. --- .github/workflows/ci.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b05a929..c2b69b39 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,16 @@ jobs: with: node-version: ${{ matrix.node_version }} + - name: Setup python + if: matrix.node_version == 14 + # Old versions of Node bundle old versions of npm. + # Old versions of npm bundle old versions of node-gyp. + # Old versions of node-gyp are incompatible with Python 3.11+. + # Install older Python (Python 3.10) as a workaround. + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install Python setuptools # This is needed for Python 3.12+, since many versions of node-gyp # are incompatible with Python 3.12+, which no-longer ships 'distutils' From ab97b546fc6064f406820a29b0208ede5dbfa664 Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Fri, 5 Jul 2024 14:22:06 -0500 Subject: [PATCH 13/14] Delete stuff we don't need from `ext` --- script/fetch-libiconv-61.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/script/fetch-libiconv-61.sh b/script/fetch-libiconv-61.sh index 2385bc3d..b9a6bce5 100644 --- a/script/fetch-libiconv-61.sh +++ b/script/fetch-libiconv-61.sh @@ -5,9 +5,8 @@ # machine. Since newer versions of macOS include a FreeBSD `libiconv`, we no # longer assume it's safe to use any ambient `libiconv.dylib` we find. # -# For this reason, we try to detect a Homebrew installation of `libiconv`; we -# also allow the user to install GNU `libiconv` manually and specify the path -# via an environment variable. +# For this reason, we download a known good version of `libiconv` from +# https://github.com/apple-oss-distributions/libiconv/tree/libiconv-61. # # We might eventually replace this approach with an explicit vendorization of # the specific files needed, but that would require a universal build of @@ -54,12 +53,25 @@ dylib_path="$EXT/lib/libiconv.2.dylib" # If this path already exists, we'll assume libiconv has already been fetched # and compiled. Otherwise we'll do it now. if [ ! -L "$dylib_path" ]; then + echo "Path $dylib_path is missing; fetching and installing libiconv." cd $SCRATCH git clone -b libiconv-61 "https://github.com/apple-oss-distributions/libiconv.git" cd libiconv/libiconv ./configure --prefix="$EXT" --libdir="$EXT/lib" make make install + + if [ ! -L "$dylib_path" ]; then + echoerr "Error: expected $dylib_path to be present, but it was not. Cannot proceed." + usage + exit 1 + else + # Remove the directories we don't need. + rm -rf "$EXT/bin" + rm -rf "$EXT/share" + fi +else + echo "Path $dylib_path is already present; skipping installation of libiconv." fi cd $ROOT From e660b4710bc069043542d015c0fdb549165fe857 Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Fri, 5 Jul 2024 19:59:34 -0500 Subject: [PATCH 14/14] =?UTF-8?q?Clean=20up=20scripts=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove scripts we don't need. * Report progress better on fetching/compiling `libiconv`. * Preserve the license and README from `libiconv`. --- .npmignore | 2 - script/adjust-install-name.sh | 23 --------- script/fetch-libiconv-61.sh | 51 +++++++++++--------- script/find-gnu-libiconv.sh | 90 ----------------------------------- 4 files changed, 29 insertions(+), 137 deletions(-) delete mode 100755 script/adjust-install-name.sh delete mode 100755 script/find-gnu-libiconv.sh diff --git a/.npmignore b/.npmignore index edb606e7..b57d7b9a 100644 --- a/.npmignore +++ b/.npmignore @@ -9,8 +9,6 @@ !src/bindings/*.cc !script/fetch-libiconv-61.sh -!script/find-gnu-libiconv.sh -!script/adjust-install-name.sh !vendor/libcxx/* diff --git a/script/adjust-install-name.sh b/script/adjust-install-name.sh deleted file mode 100755 index e68226c7..00000000 --- a/script/adjust-install-name.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# This script can be used if we find it necessary, for code signing reasons, -# not to alter the install name of `libiconv.2.dylib` during compilation. macOS -# complains when we do it, saying that the code signature has been invalidated, -# but we haven't noticed any ill effects… yet. -# -# But this script would allow us to point `superstring.node` at the correct -# library by figuring out `libicov.2.dylib`’s existing install name, rather -# than setting it to a known value in an earlier step. - -product_dir=$1 - -# Ask for the current install name expected by `superstring.node`. We need to -# know this in order to change it in the next step. -current_install_name=$(otool -L "$product_dir/superstring.node" | awk 'BEGIN{FS=OFS=" "};NR==2{print $1}') - -# Now use `install_name_tool` to tell `superstring.node` to instead look for -# `libiconv.2.dylib` at a path relative to itself. -install_name_tool -change \ - "$current_install_name" \ - "@loader_path/../../vendor/libiconv/lib/libiconv.2.dylib" \ - "$product_dir/superstring.node" diff --git a/script/fetch-libiconv-61.sh b/script/fetch-libiconv-61.sh index b9a6bce5..50bc244f 100644 --- a/script/fetch-libiconv-61.sh +++ b/script/fetch-libiconv-61.sh @@ -1,20 +1,29 @@ - #!/bin/bash -# The purpose of this script is to find a copy of GNU `libiconv` on this macOS -# machine. Since newer versions of macOS include a FreeBSD `libiconv`, we no -# longer assume it's safe to use any ambient `libiconv.dylib` we find. +# When compiling `superstring` on macOS, we used to be able to rely on the +# builtin version of `libiconv`. But newer versions of macOS include FreeBSD +# `libiconv`, rather than GNU `libiconv`; the two are not API-compatible. # # For this reason, we download a known good version of `libiconv` from # https://github.com/apple-oss-distributions/libiconv/tree/libiconv-61. # # We might eventually replace this approach with an explicit vendorization of # the specific files needed, but that would require a universal build of -# `libiconv.2.dylib`. For now, letting the user provide their `libiconv` has -# the advantage of very likely matching the system's architecture. +# `libiconv.2.dylib`. For now, letting the user compile their own `libiconv` +# has the advantage of very likely matching the system's architecture. echoerr() { echo "$@\n" >&2; } +create-if-missing() { + if [ -z "$1" ]; then + echoerr "Error: $1 is a file." + usage + exit 1 + fi + if [ ! -d "$1" ]; then + mkdir "$1" + fi +} usage() { echoerr "superstring requires the GNU libiconv library, which macOS no longer bundles in recent versions. This package attempts to compile it from GitHub. If you're seeing this message, something has gone wrong; check the README for information and consider filing an issue." @@ -34,17 +43,6 @@ cleanup() { } trap cleanup SIGINT EXIT -create-if-missing() { - if [ -z "$1" ]; then - echoerr "Error: $1 is a file." - usage - exit 1 - fi - if [ ! -d "$1" ]; then - mkdir "$1" - fi -} - create-if-missing "$EXT" create-if-missing "$SCRATCH" @@ -55,6 +53,11 @@ dylib_path="$EXT/lib/libiconv.2.dylib" if [ ! -L "$dylib_path" ]; then echo "Path $dylib_path is missing; fetching and installing libiconv." cd $SCRATCH + # TODO: Instead of downloading this each time, we can check this into source + # control via git subtree. That would allow someone to build this without + # needing internet connectivity. But we'd still need to do a `make install` — + # at least until we can produce a "universal" version of the `.dylib` and put + # _that_ in source control. git clone -b libiconv-61 "https://github.com/apple-oss-distributions/libiconv.git" cd libiconv/libiconv ./configure --prefix="$EXT" --libdir="$EXT/lib" @@ -62,14 +65,18 @@ if [ ! -L "$dylib_path" ]; then make install if [ ! -L "$dylib_path" ]; then - echoerr "Error: expected $dylib_path to be present, but it was not. Cannot proceed." + echoerr "Error: expected $dylib_path to be present, but it was not. Installation of libiconv failed. Cannot proceed." usage exit 1 - else - # Remove the directories we don't need. - rm -rf "$EXT/bin" - rm -rf "$EXT/share" fi + + # Remove the directories we don't need. + rm -rf "$EXT/bin" + rm -rf "$EXT/share" + + # Copy over the license and README from the scratch directory. + cp "COPYING.LIB" "$EXT" + cp "README" "$EXT" else echo "Path $dylib_path is already present; skipping installation of libiconv." fi diff --git a/script/find-gnu-libiconv.sh b/script/find-gnu-libiconv.sh deleted file mode 100755 index 4565db5d..00000000 --- a/script/find-gnu-libiconv.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash - -# The purpose of this script is to find a copy of GNU `libiconv` on this macOS -# machine. Since newer versions of macOS include a FreeBSD `libiconv`, we no -# longer assume it's safe to use any ambient `libiconv.dylib` we find. -# -# For this reason, we try to detect a Homebrew installation of `libiconv`; we -# also allow the user to install GNU `libiconv` manually and specify the path -# via an environment variable. -# -# We might eventually replace this approach with an explicit vendorization of -# the specific files needed, but that would require a universal build of -# `libiconv.2.dylib`. For now, letting the user provide their `libiconv` has -# the advantage of very likely matching the system's architecture. - -echoerr() { echo "$@\n" >&2; } - -usage() { - echoerr "superstring requires the GNU libiconv library. You can install it with Homebrew (\`brew install libiconv\`) and we'll be able to detect its presence. You may also define a SUPERSTRING_LIBICONV_PATH variable set to the absolute path of your libiconv installation. (This path should have \`lib\` and \`include\` as child directories.)" -} - -# Identify the directory of this script. -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -# Find this package's `vendor` directory; make sure it exists. -VENDOR="$SCRIPT_DIR/../vendor" -if [ ! -d "$VENDOR" ]; then - echoerr "Aborting; expected $VENDOR to be a directory, but it was not." - exit 1 -fi - -TARGET="$VENDOR/libiconv" - -# Make a `libiconv` directory for us to vendorize into. -if [ ! -d "$TARGET" ]; then - mkdir "$TARGET" -fi - -if [[ ! -z "${SUPERSTRING_LIBICONV_PATH}" ]]; then - # First, we allow the user to specify a path and override our heuristics. - # This should propagate even if the user ran `yarn install` from a project - # that has `superstring` as a dependency. - source="${SUPERSTRING_LIBICONV_PATH}" -elif command -v brew &> /dev/null; then - # If that variable isn't set, then we check if this machine has Homebrew - # installed. If so, we'll opt into Homebrew's version of `libiconv`. This is - # the safest option because we can reasonably conclude that this `libiconv` - # is the right flavor and matches the system's architecture. - source="$(brew --prefix)/opt/libiconv" -else - # If neither of these things is true, we won't try to add an entry to - # `library_dirs`. - usage - exit 1 -fi - -if [ ! -d "$source" ]; then - echoerr "Expected $source to be the path to GNU libiconv, but it is not a directory. " - usage - exit 1 -fi - -# We expect the `dylib` we need to be at this exact path. -dylib_path="${source}/lib/libiconv.2.dylib" - -if [ ! -f "$dylib_path" ]; then - echoerr "Invalid location for libiconv. Expected to find: ${dylib_path} but it was not present." - usage - exit 1 -fi - -# We need the `include` directory for compilation, plus the `libiconv.2.dylib` -# file. We'll also copy over the README and license files for compliance. -cp -R "${source}/include" "$TARGET/" -cp "${dylib_path}" "$TARGET/lib/" -cp "${source}/COPYING.LIB" "$TARGET/" -cp "${source}/README" "$TARGET/" - - -# Set the install name of this library to something neutral and predictable to -# make a later step easier. -# -# NOTE: macOS complains about this action invalidating the library's code -# signature. This has not been observed to have any negative effects for -# Pulsar, possibly because we sign and notarize the entire app at a later stage -# of the build process. But if it _did_ have negative effects, we could switch -# to a different approach and skip this step. See the `binding.gyp` file for -# further details. - -install_name_tool -id "libiconv.2.dylib" "${dylib_path}"