diff --git a/.gitignore b/.gitignore index fcc9ac911..4ff620b27 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,409 @@ hxcpp.n *.ilk .vscode + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudio +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudio + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml + +### VisualStudio Patch ### +# Additional files built by Visual Studio + +# End of https://www.toptal.com/developers/gitignore/api/visualstudio diff --git a/include/cpp/encoding/Ascii.hpp b/include/cpp/encoding/Ascii.hpp new file mode 100644 index 000000000..0d89fa63c --- /dev/null +++ b/include/cpp/encoding/Ascii.hpp @@ -0,0 +1,24 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Ascii final + { + static bool isEncoded(const String& string); + + /// + /// Encode the provided string to ASCII bytes and write them to the buffer. + /// If the provided string is UTF16 encoded an exception is raised and nothing is written to the buffer. + /// + /// Number of chars written to the buffer. + static int64_t encode(const String& string, cpp::marshal::View buffer); + + /// + /// Create a string from the provided ASCII bytes. + /// + static String decode(cpp::marshal::View string); + }; + } +} \ No newline at end of file diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp new file mode 100644 index 000000000..0e3d45c4b --- /dev/null +++ b/include/cpp/encoding/Utf16.hpp @@ -0,0 +1,24 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Utf16 final + { + static bool isEncoded(const String& string); + + static int getByteCount(const char32_t& codepoint); + static int64_t getByteCount(const String& string); + + static int getCharCount(const char32_t& codepoint); + static int64_t getCharCount(const String& string); + + static int encode(const char32_t& codepoint, cpp::marshal::View buffer); + static int64_t encode(const String& string, cpp::marshal::View buffer); + + static char32_t codepoint(cpp::marshal::View buffer); + static String decode(cpp::marshal::View buffer); + }; + } +} \ No newline at end of file diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp new file mode 100644 index 000000000..809d90f68 --- /dev/null +++ b/include/cpp/encoding/Utf8.hpp @@ -0,0 +1,22 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Utf8 final + { + static int getByteCount(const char32_t& codepoint); + static int64_t getByteCount(const String& string); + + static int getCharCount(const char32_t& codepoint); + static int64_t getCharCount(const String& string); + + static int encode(const char32_t& codepoint, cpp::marshal::View buffer); + static int64_t encode(const String& string, cpp::marshal::View buffer); + + static char32_t codepoint(cpp::marshal::View buffer); + static String decode(cpp::marshal::View buffer); + }; + } +} \ No newline at end of file diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index d4bdc0d00..fed1d91bd 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -202,6 +202,7 @@ namespace cpp bool isEmpty(); View slice(int64_t index); View slice(int64_t index, int64_t length); + void copyTo(const View& destination); bool tryCopyTo(const View& destination); template View reinterpret(); int compare(const View& inRHS); @@ -210,10 +211,6 @@ namespace cpp bool operator!=(const View& inRHS) const; T& operator[] (int64_t index); - - operator void* (); - operator T* (); - operator Pointer(); }; struct Marshal final @@ -224,17 +221,8 @@ namespace cpp static const bool isBigEndian = false; #endif - static View asView(const char* cstring); - static View asView(const char16_t* cstring); - - static View toCharView(const ::String& string); - static int toCharView(const ::String&, View buffer); - - static View toWideCharView(const ::String& string); - static int toWideCharView(const ::String& string, View buffer); - - static ::String toString(View buffer); - static ::String toString(View buffer); + static View asCharView(const ::String& string); + static View asWideCharView(const ::String& string); template static T read(View view); template static ::cpp::Pointer readPointer(View view); diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 997cf5091..4bb3076be 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -18,72 +18,38 @@ namespace } } -inline cpp::marshal::View cpp::marshal::Marshal::asView(const char* cstring) +inline cpp::marshal::View cpp::marshal::Marshal::asCharView(const ::String& string) { - return cpp::marshal::View(const_cast(cstring), static_cast(std::char_traits::length(cstring))); -} - -inline cpp::marshal::View cpp::marshal::Marshal::asView(const char16_t* cstring) -{ - return cpp::marshal::View(const_cast(cstring), static_cast(std::char_traits::length(cstring))); -} - -inline cpp::marshal::View cpp::marshal::Marshal::toCharView(const ::String& string) -{ - auto length = 0; - auto ptr = string.utf8_str(nullptr, true, &length); - - return View(const_cast(ptr), length + 1); -} - -inline int cpp::marshal::Marshal::toCharView(const ::String& string, View buffer) -{ - auto length = 0; - - if (string.utf8_str(buffer, &length)) + if (null() == string) { - return length; + hx::NullReference("string", false); } - else - { - hx::Throw(HX_CSTRING("Not enough space in the view to write the string")); - return 0; + if (false == string.isAsciiEncoded()) + { + hx::Throw(HX_CSTRING("String is not ASCII encoded")); } -} - -inline cpp::marshal::View cpp::marshal::Marshal::toWideCharView(const ::String& string) -{ - auto length = 0; - auto ptr = string.wc_str(nullptr, &length); - return View(const_cast(ptr), length + 1); + return View(const_cast(string.raw_ptr()), string.length); } -inline int cpp::marshal::Marshal::toWideCharView(const ::String& string, View buffer) +inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const ::String& string) { - auto length = 0; - - if (string.wc_str(buffer, &length)) +#if defined(HX_SMART_STRINGS) + if (null() == string) { - return length; + hx::NullReference("string", false); } - else - { - hx::Throw(HX_CSTRING("Not enough space in the view to write the string")); - return 0; + if (false == string.isUTF16Encoded()) + { + hx::Throw(HX_CSTRING("String is not ASCII encoded")); } -} -inline ::String cpp::marshal::Marshal::toString(View buffer) -{ - return ::String::create(buffer); -} - -inline ::String cpp::marshal::Marshal::toString(View buffer) -{ - return ::String::create(buffer); + return View(const_cast(string.raw_wptr()), string.length); +#else + return hx::Throw(HX_CSTRING("HX_SMART_STRINGS not defined")); +#endif } template diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index 6b2ad2d1f..1e54d80b3 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -20,6 +20,15 @@ inline bool cpp::marshal::View::tryCopyTo(const View& destination) return true; } +template +inline void cpp::marshal::View::copyTo(const View& destination) +{ + if (tryCopyTo(destination) == false) + { + hx::Throw(HX_CSTRING("View OOB")); + } +} + template inline void cpp::marshal::View::clear() { @@ -67,7 +76,7 @@ template template inline cpp::marshal::View cpp::marshal::View::reinterpret() { - auto newPtr = ::cpp::Pointer{ ptr.reinterpret() }; + auto newPtr = ::cpp::Pointer(reinterpret_cast(ptr.ptr)); auto fromSize = sizeof(T); auto toSize = sizeof(K); @@ -121,22 +130,4 @@ inline T& cpp::marshal::View::operator[](int64_t index) } return ptr[index]; -} - -template -inline cpp::marshal::View::operator void* () -{ - return ptr.ptr; -} - -template -inline cpp::marshal::View::operator T* () -{ - return ptr.ptr; -} - -template -inline cpp::marshal::View::operator cpp::Pointer () -{ - return ptr; } \ No newline at end of file diff --git a/include/hxcpp.h b/include/hxcpp.h index 68824a682..71618c1b3 100755 --- a/include/hxcpp.h +++ b/include/hxcpp.h @@ -358,6 +358,9 @@ typedef PropertyAccessMode PropertyAccess; #include #include #include +#include +#include +#include #include #include #include diff --git a/src/cpp/encoding/Ascii.cpp b/src/cpp/encoding/Ascii.cpp new file mode 100644 index 000000000..7a0acd8bf --- /dev/null +++ b/src/cpp/encoding/Ascii.cpp @@ -0,0 +1,66 @@ +#include + +using namespace cpp::marshal; + +bool cpp::encoding::Ascii::isEncoded(const String& string) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + return string.isAsciiEncoded(); +} + +int64_t cpp::encoding::Ascii::encode(const String& string, View buffer) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + if (string.isUTF16Encoded()) + { + hx::Throw(HX_CSTRING("String cannot be encoded to ASCII")); + } + + auto src = cpp::marshal::View(string.raw_ptr(), string.length).reinterpret(); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } +} + +String cpp::encoding::Ascii::decode(View view) +{ + if (view.isEmpty()) + { + return hx::Throw(HX_CSTRING("View is empty")); + } + + auto bytes = int64_t{ 0 }; + auto i = int64_t{ 0 }; + auto chars = view.reinterpret(); + + while (i < chars.length && 0 != chars.ptr[i]) + { + bytes += sizeof(char); + i++; + } + + if (0 == bytes) + { + return String::emptyString; + } + + auto backing = hx::NewGCPrivate(0, bytes + sizeof(char)); + + std::memcpy(backing, view.ptr.ptr, bytes); + + return String(static_cast(backing), bytes / sizeof(char)); +} diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp new file mode 100644 index 000000000..ca5d742ee --- /dev/null +++ b/src/cpp/encoding/Utf16.cpp @@ -0,0 +1,260 @@ +#include +#include + +using namespace cpp::marshal; + +namespace +{ + bool isSurrogate(char32_t codepoint) + { + return codepoint >= 0xd800 && codepoint < 0xe000; + } + + bool isLowSurrogate(char32_t codepoint) + { + return codepoint >= 0xdc00 && codepoint < 0xe000; + } + + bool isHighSurrogate(char32_t codepoint) + { + return codepoint >= 0xd800 && codepoint < 0xdc00; + } + + bool isAsciiBuffer(View buffer) + { + while (buffer.isEmpty() == false) + { + auto p = cpp::encoding::Utf16::codepoint(buffer); + + if (p > 127) + { + return false; + } + + buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); + } + + return true; + } + + String toAsciiString(View buffer) + { + auto bytes = buffer.length / sizeof(char16_t); + auto chars = View(hx::InternalNew(bytes + 1, false), bytes * sizeof(char)); + auto output = chars.reinterpret(); + + while (buffer.isEmpty() == false) + { + auto p = cpp::encoding::Utf16::codepoint(buffer); + + output[0] = static_cast(p); + + buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); + output = output.slice(1); + } + + return String(chars.ptr.ptr, chars.length); + } +} + +bool cpp::encoding::Utf16::isEncoded(const String& string) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + return string.isUTF16Encoded(); +} + +int cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) +{ + if (codepoint >= 0x10000) + { + if (codepoint < 0x110000) + { + return 4; + } + } + + return 2; +} + +int64_t cpp::encoding::Utf16::getByteCount(const String& string) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + if (string.isUTF16Encoded()) + { + return string.length * sizeof(char16_t); + } + else + { + auto bytes = int64_t{ 0 }; + for (auto i = 0; i < string.length; i++) + { + bytes += getByteCount(static_cast(string.raw_ptr()[i])); + } + + return bytes; + } +} + +int cpp::encoding::Utf16::getCharCount(const char32_t& codepoint) +{ + return getByteCount(codepoint) / sizeof(char16_t); +} + +int64_t cpp::encoding::Utf16::getCharCount(const String& string) +{ + return getByteCount(string) / sizeof(char16_t); +} + +int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + if (0 == string.length) + { + return 0; + } + + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + if (string.isUTF16Encoded()) + { + auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_wptr())), string.length * sizeof(char16_t)); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + } + else + { + auto bytes = int64_t{ 0 }; + for (auto i = 0; i < string.length; i++) + { + bytes += getByteCount(static_cast(string.raw_ptr()[i])); + } + + if (bytes > buffer.length) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + for (auto i = 0; i < string.length; i++) + { + buffer = buffer.slice(encode(static_cast(string.raw_ptr()[i]), buffer)); + } + + return bytes; + } +} + +int cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::View buffer) +{ + if (codepoint < 0xD800) + { + Marshal::writeUInt16(buffer, static_cast(codepoint)); + + return 2; + } + else if (codepoint < 0xE000) + { + // D800 - DFFF is invalid + + return hx::Throw(HX_CSTRING("Invalid UTF16")); + } + else if (codepoint < 0x10000) + { + Marshal::writeUInt16(buffer, static_cast(codepoint)); + + return 2; + } + else if (codepoint < 0x110000) + { + auto staging = std::array(); + auto fst = View(staging.data(), 2); + auto snd = View(staging.data() + 2, 2); + auto all = View(staging.data(), staging.size()); + + Marshal::writeUInt16(fst, 0xD800 + (((codepoint - 0x10000) >> 10) & 0x3FF)); + Marshal::writeUInt16(snd, 0xDC00 + ((codepoint - 0x10000) & 0x3FF)); + + all.copyTo(buffer); + + return 4; + } + + return 0; +} + +String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) +{ + if (buffer.isEmpty()) + { + return String::emptyString; + } + + if (isAsciiBuffer(buffer)) + { + return toAsciiString(buffer); + } + + auto chars = int64_t{ 0 }; + auto i = int64_t{ 0 }; + while (i < buffer.length) + { + auto p = codepoint(buffer.slice(i)); + + chars += getCharCount(p); + i += getByteCount(p); + } + + auto backing = View(::String::allocChar16Ptr(chars), chars); + auto output = backing.reinterpret(); + + while (false == buffer.isEmpty()) + { + auto p = codepoint(buffer); + + buffer = buffer.slice(getByteCount(p)); + output = output.slice(encode(p, output)); + } + + return String(backing.ptr.ptr, chars); +} + +char32_t cpp::encoding::Utf16::codepoint(cpp::marshal::View buffer) +{ + auto first = static_cast(Marshal::readUInt16(buffer)); + + if (0xD800 <= first && first < 0xDc00) + { + auto second = static_cast(Marshal::readUInt16(buffer.slice(2))); + if (0xDC00 <= second && second < 0xE000) + { + return static_cast((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); + } + + return int{ hx::Throw(HX_CSTRING("Invalid UTF16")) }; + } + else + { + return static_cast(first); + } +} diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp new file mode 100644 index 000000000..b6d0f1407 --- /dev/null +++ b/src/cpp/encoding/Utf8.cpp @@ -0,0 +1,270 @@ +#include +#include + +using namespace cpp::marshal; + +namespace +{ + bool isAsciiBuffer(View buffer) + { + while (buffer.isEmpty() == false) + { + auto p = cpp::encoding::Utf8::codepoint(buffer); + + if (p > 127) + { + return false; + } + + buffer = buffer.slice(cpp::encoding::Utf8::getByteCount(p)); + } + + return true; + } +} + +int cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) +{ + if (codepoint <= 0x7F) + { + return 1; + } + else if (codepoint <= 0x7FF) + { + return 2; + } + else if (codepoint <= 0xFFFF) + { + return 3; + } + else + { + return 4; + } +} + +int64_t cpp::encoding::Utf8::getByteCount(const String& string) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + if (string.isAsciiEncoded()) + { + return string.length; + } + +#if defined(HX_SMART_STRINGS) + auto source = View(string.raw_wptr(), string.length).reinterpret(); + auto length = source.length; + auto bytes = int64_t{ 0 }; + + while (false == source.isEmpty()) + { + auto p = Utf16::codepoint(source); + + source = source.slice(Utf16::getByteCount(p)); + bytes += getByteCount(p); + } + + return bytes; +#else + return hx::Throw(HX_CSTRING("Unexpected encoding error")); +#endif +} + +int cpp::encoding::Utf8::getCharCount(const char32_t& codepoint) +{ + return getByteCount(codepoint) / sizeof(char); +} + +int64_t cpp::encoding::Utf8::getCharCount(const String& string) +{ + return getByteCount(string) / sizeof(char); +} + +int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + if (0 == string.length) + { + return 0; + } + + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + if (string.isAsciiEncoded()) + { + auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_ptr())), string.length * sizeof(char)); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + } + +#if defined(HX_SMART_STRINGS) + if (getByteCount(string) > buffer.length) + { + hx::Throw(HX_CSTRING("Buffer too small")); + } + + auto initialPtr = buffer.ptr.ptr; + auto source = View(string.raw_wptr(), string.length).reinterpret(); + + while (false == source.isEmpty()) + { + auto p = Utf16::codepoint(source); + + source = source.slice(Utf16::getByteCount(p)); + buffer = buffer.slice(encode(p, buffer)); + } + + return buffer.ptr.ptr - initialPtr; +#else + return hx::Throw(HX_CSTRING("Unexpected encoding error")); +#endif +} + +int cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::View buffer) +{ + if (codepoint <= 0x7F) + { + buffer[0] = static_cast(codepoint); + + return 1; + } + else if (codepoint <= 0x7FF) + { + auto data = std::array + { { + static_cast(0xC0 | (codepoint >> 6)), + static_cast(0x80 | (codepoint & 63)) + } }; + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); + } + else if (codepoint <= 0xFFFF) + { + auto data = std::array + { { + static_cast(0xE0 | (codepoint >> 12)), + static_cast(0x80 | ((codepoint >> 6) & 63)), + static_cast(0x80 | (codepoint & 63)) + } }; + + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); + } + else + { + auto data = std::array + { { + static_cast(0xF0 | (codepoint >> 18)), + static_cast(0x80 | ((codepoint >> 12) & 63)), + static_cast(0x80 | ((codepoint >> 6) & 63)), + static_cast(0x80 | (codepoint & 63)) + } }; + + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); + } +} + +String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) +{ + if (buffer.isEmpty()) + { + return String::emptyString; + } + + if (isAsciiBuffer(buffer)) + { + return Ascii::decode(buffer); + } + + auto chars = int64_t{ 0 }; + auto i = int64_t{ 0 }; + + while (i < buffer.length) + { + auto p = codepoint(buffer.slice(i)); + + i += getByteCount(p); + chars += Utf16::getCharCount(p); + } + + auto backing = View(::String::allocChar16Ptr(chars), chars); + auto output = backing.reinterpret(); + + while (false == buffer.isEmpty()) + { + auto p = codepoint(buffer); + + buffer = buffer.slice(getByteCount(p)); + output = output.slice(Utf16::encode(p, output)); + } + + return String(backing.ptr.ptr, chars); +} + +char32_t cpp::encoding::Utf8::codepoint(cpp::marshal::View buffer) +{ + auto b0 = static_cast(buffer[0]); + + if ((b0 & 0x80) == 0) + { + return b0; + } + else if ((b0 & 0xE0) == 0xC0) + { + return (static_cast(b0 & 0x1F) << 6) | static_cast(buffer.slice(1)[0] & 0x3F); + } + else if ((b0 & 0xF0) == 0xE0) + { + auto staging = std::array(); + auto dst = View(staging.data(), staging.size()); + + buffer.slice(1, staging.size()).copyTo(dst); + + return (static_cast(b0 & 0x0F) << 12) | (static_cast(staging[0] & 0x3F) << 6) | static_cast(staging[1] & 0x3F); + } + else if ((b0 & 0xF8) == 0xF0) + { + auto staging = std::array(); + auto dst = View(staging.data(), staging.size()); + + buffer.slice(1, staging.size()).copyTo(dst); + + return + (static_cast(b0 & 0x07) << 18) | + (static_cast(staging[0] & 0x3F) << 12) | + (static_cast(staging[1] & 0x3F) << 6) | + static_cast(staging[2] & 0x3F); + } + else + { + return int{ hx::Throw(HX_CSTRING("Failed to read codepoint")) }; + } +} \ No newline at end of file diff --git a/test/native/Native.hx b/test/native/Native.hx index 0c8cb9506..d82f41a19 100644 --- a/test/native/Native.hx +++ b/test/native/Native.hx @@ -45,7 +45,11 @@ class Native new tests.marshalling.view.TestView(), new tests.marshalling.view.TestMarshal(), - new tests.marshalling.view.TestViewExtensions() + new tests.marshalling.view.TestViewExtensions(), + + new tests.encoding.TestAscii(), + new tests.encoding.TestUtf8(), + new tests.encoding.TestUtf16(), #end ]); } diff --git a/test/native/compile.hxml b/test/native/compile.hxml index a063c9266..825082805 100644 --- a/test/native/compile.hxml +++ b/test/native/compile.hxml @@ -1,4 +1,3 @@ -m Native -L utest --D HXCPP-DEBUGGER --cpp bin \ No newline at end of file diff --git a/test/native/tests/encoding/TestAscii.hx b/test/native/tests/encoding/TestAscii.hx new file mode 100644 index 000000000..08d6b9df4 --- /dev/null +++ b/test/native/tests/encoding/TestAscii.hx @@ -0,0 +1,86 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Ascii; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestAscii extends Test +{ + function test_isEncoded_null() { + Assert.raises(() -> Ascii.isEncoded(null)); + } + + function test_isEncoded_ascii() { + Assert.isTrue(Ascii.isEncoded("test")); + } + + function test_isEncoded_utf16() { + Assert.isFalse(Ascii.isEncoded("πŸ˜‚")); + } + + function test_encode_null() { + final buffer = Bytes.alloc(4); + + Assert.raises(() -> Ascii.encode(null, buffer.asView())); + } + + function test_encode_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Ascii.encode("test", buffer.asView())); + } + + function test_encode_utf16() { + final buffer = Bytes.alloc(1024); + + Assert.raises(() -> Ascii.encode("πŸ˜‚", buffer.asView())); + } + + function test_encode() { + final buffer = Bytes.alloc(1024); + + Assert.equals(4i64, Ascii.encode("test", buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals('e'.code, buffer.get(1)); + Assert.equals('s'.code, buffer.get(2)); + Assert.equals('t'.code, buffer.get(3)); + } + + function test_decode_empty() { + Assert.raises(() -> Ascii.decode(ViewExtensions.empty())); + } + + function test_decode() { + final buffer = Bytes.alloc(4); + buffer.set(0, 't'.code); + buffer.set(1, 'e'.code); + buffer.set(2, 's'.code); + buffer.set(3, 't'.code); + + Assert.equals('test', Ascii.decode(buffer.asView())); + } + + function test_decode_null_termination() { + final buffer = Bytes.alloc(9); + buffer.set(0, 't'.code); + buffer.set(1, 'e'.code); + buffer.set(2, 's'.code); + buffer.set(3, 't'.code); + buffer.set(4, 0); + buffer.set(5, 't'.code); + buffer.set(6, 'e'.code); + buffer.set(7, 's'.code); + buffer.set(8, 't'.code); + + Assert.equals('test', Ascii.decode(buffer.asView())); + } + + function test_decode_no_string() { + final buffer = Bytes.alloc(1); + + Assert.equals('', Ascii.decode(buffer.asView())); + } +} \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf16.hx b/test/native/tests/encoding/TestUtf16.hx new file mode 100644 index 000000000..8198c0152 --- /dev/null +++ b/test/native/tests/encoding/TestUtf16.hx @@ -0,0 +1,174 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Utf16; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestUtf16 extends Test { + function test_isEncoded_null() { + Assert.raises(() -> Utf16.isEncoded(null)); + } + + function test_isEncoded_ascii() { + Assert.isFalse(Utf16.isEncoded("test")); + } + + function test_isEncoded_utf16() { + Assert.isTrue(Utf16.isEncoded("πŸ˜‚")); + } + + public function test_getByteCount_codepoint() { + Assert.equals(2i64, Utf16.getByteCount('a'.code)); + Assert.equals(2i64, Utf16.getByteCount('Ζ…'.code)); + Assert.equals(2i64, Utf16.getByteCount('バ'.code)); + Assert.equals(4i64, Utf16.getByteCount('𝄳'.code)); + Assert.equals(4i64, Utf16.getByteCount('πŸ˜‚'.code)); + } + + public function test_getByteCount_string_null() { + Assert.raises(() -> Utf16.getByteCount((null:String))); + } + + public function test_getByteCount_string_empty() { + Assert.equals(0i64, Utf16.getByteCount('')); + } + + public function test_getByteCount_string_ascii() { + Assert.equals(26i64, Utf16.getByteCount('Hello, World!')); + } + + public function test_getByteCount_string_utf16() { + Assert.equals(26i64, Utf16.getByteCount('HelloπŸ˜‚World!')); + } + + public function test_encode_codepoint() { + final buffer = Bytes.alloc(4); + + Assert.equals(2i64, Utf16.encode('a'.code, buffer.asView())); + Assert.equals(0x61, buffer.get(0)); + Assert.equals(0x00, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf16.encode('Ζ…'.code, buffer.asView())); + Assert.equals(0x85, buffer.get(0)); + Assert.equals(0x01, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf16.encode('バ'.code, buffer.asView())); + Assert.equals(0xD0, buffer.get(0)); + Assert.equals(0x30, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(4i64, Utf16.encode('𝄳'.code, buffer.asView())); + Assert.equals(0x34, buffer.get(0)); + Assert.equals(0xD8, buffer.get(1)); + Assert.equals(0x33, buffer.get(2)); + Assert.equals(0xDD, buffer.get(3)); + buffer.asView().clear(); + } + + public function test_encode_codepoint_empty_view() { + Assert.raises(() -> Utf16.encode('a'.code, ViewExtensions.empty())); + } + + public function test_encode_codepoint_no_partial_writes() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf16.encode('𝄳'.code, buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_null() { + final buffer = Bytes.alloc(8); + + Assert.raises(() -> Utf16.encode((null:String), buffer.asView())); + } + + public function test_encode_string_empty_view() { + Assert.raises(() -> Utf16.encode('test', ViewExtensions.empty())); + } + + public function test_encode_string_empty_string() { + final buffer = Bytes.alloc(8); + + Assert.equals(0i64, Utf16.encode('', buffer.asView())); + } + + public function test_encode_string_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf16.encode('test', buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_ascii() { + final buffer = Bytes.alloc(8); + + Assert.equals(8i64, Utf16.encode('test', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + Assert.equals('e'.code, buffer.get(2)); + Assert.equals(0, buffer.get(3)); + Assert.equals('s'.code, buffer.get(4)); + Assert.equals(0, buffer.get(5)); + Assert.equals('t'.code, buffer.get(6)); + Assert.equals(0, buffer.get(7)); + } + + public function test_encode_string_utf16() { + final buffer = Bytes.alloc(16); + + Assert.equals(12i64, Utf16.encode('teπŸ˜‚st', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + Assert.equals('e'.code, buffer.get(2)); + Assert.equals(0, buffer.get(3)); + + Assert.equals(0x3D, buffer.get(4)); + Assert.equals(0xD8, buffer.get(5)); + Assert.equals(0x02, buffer.get(6)); + Assert.equals(0xDE, buffer.get(7)); + + Assert.equals('s'.code, buffer.get(8)); + Assert.equals(0, buffer.get(9)); + Assert.equals('t'.code, buffer.get(10)); + Assert.equals(0, buffer.get(11)); + } + + public function test_decode_codepoint() { + var bytes = Bytes.ofHex('6100'); + Assert.equals('a'.code, Utf16.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('8501'); + Assert.equals('Ζ…'.code, Utf16.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('D030'); + Assert.equals('バ'.code, Utf16.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('34D833DD'); + Assert.equals('𝄳'.code, Utf16.codepoint(bytes.asView())); + } + + public function test_decode_string() { + var bytes = Bytes.ofHex('6100'); + Assert.equals('a', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('8501'); + Assert.equals('Ζ…', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('D030'); + Assert.equals('バ', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('34D833DD'); + Assert.equals('𝄳', Utf16.decode(bytes.asView())); + } + + public function test_decode_empty_view() { + Assert.equals("", Utf16.decode(ViewExtensions.empty())); + } +} \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf8.hx b/test/native/tests/encoding/TestUtf8.hx new file mode 100644 index 000000000..716646709 --- /dev/null +++ b/test/native/tests/encoding/TestUtf8.hx @@ -0,0 +1,151 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Utf8; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestUtf8 extends Test { + public function test_getByteCount_codepoint() { + Assert.equals(1i64, Utf8.getByteCount('a'.code)); + Assert.equals(2i64, Utf8.getByteCount('Ζ…'.code)); + Assert.equals(3i64, Utf8.getByteCount('バ'.code)); + Assert.equals(4i64, Utf8.getByteCount('𝄳'.code)); + } + + public function test_getByteCount_string_null() { + Assert.raises(() -> Utf8.getByteCount((null:String))); + } + + public function test_getByteCount_string_empty() { + Assert.equals(0i64, Utf8.getByteCount('')); + } + + public function test_getByteCount_string_ascii() { + Assert.equals(13i64, Utf8.getByteCount('Hello, World!')); + } + + public function test_getByteCount_string_utf16() { + Assert.equals(15i64, Utf8.getByteCount('HelloπŸ˜‚World!')); + } + + public function test_encode_codepoint() { + final buffer = Bytes.alloc(4); + + Assert.equals(1i64, Utf8.encode('a'.code, buffer.asView())); + Assert.equals(0x61, buffer.get(0)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf8.encode('Ζ…'.code, buffer.asView())); + Assert.equals(0xC6, buffer.get(0)); + Assert.equals(0x85, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(3i64, Utf8.encode('バ'.code, buffer.asView())); + Assert.equals(0xE3, buffer.get(0)); + Assert.equals(0x83, buffer.get(1)); + Assert.equals(0x90, buffer.get(2)); + buffer.asView().clear(); + + Assert.equals(4i64, Utf8.encode('𝄳'.code, buffer.asView())); + Assert.equals(0xF0, buffer.get(0)); + Assert.equals(0x9D, buffer.get(1)); + Assert.equals(0x84, buffer.get(2)); + Assert.equals(0xB3, buffer.get(3)); + buffer.asView().clear(); + } + + public function test_encode_codepoint_empty_view() { + Assert.raises(() -> Utf8.encode('a'.code, ViewExtensions.empty())); + } + + public function test_encode_codepoint_no_partial_writes() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf8.encode('𝄳'.code, buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_null() { + final buffer = Bytes.alloc(8); + + Assert.raises(() -> Utf8.encode((null:String), buffer.asView())); + } + + public function test_encode_string_empty_view() { + Assert.raises(() -> Utf8.encode('test', ViewExtensions.empty())); + } + + public function test_encode_string_empty_string() { + final buffer = Bytes.alloc(8); + + Assert.equals(0i64, Utf8.encode('', buffer.asView())); + } + + public function test_encode_string_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf8.encode('test', buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_ascii() { + final buffer = Bytes.alloc(4); + + Assert.equals(4i64, Utf8.encode('test', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals('e'.code, buffer.get(1)); + Assert.equals('s'.code, buffer.get(2)); + Assert.equals('t'.code, buffer.get(3)); + } + + public function test_encode_string_utf16() { + final buffer = Bytes.alloc(8); + + Assert.equals(8i64, Utf8.encode('teπŸ˜‚st', buffer.asView())); + Assert.equals(0x74, buffer.get(0)); + Assert.equals(0x65, buffer.get(1)); + Assert.equals(0xF0, buffer.get(2)); + Assert.equals(0x9F, buffer.get(3)); + Assert.equals(0x98, buffer.get(4)); + Assert.equals(0x82, buffer.get(5)); + Assert.equals(0x73, buffer.get(6)); + Assert.equals(0x74, buffer.get(7)); + } + + public function test_decode_codepoint() { + var bytes = Bytes.ofHex('61'); + Assert.equals('a'.code, Utf8.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('c685'); + Assert.equals('Ζ…'.code, Utf8.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('e38390'); + Assert.equals('バ'.code, Utf8.codepoint(bytes.asView())); + + var bytes = Bytes.ofHex('f09d84b3'); + Assert.equals('𝄳'.code, Utf8.codepoint(bytes.asView())); + } + + public function test_decode_string() { + var bytes = Bytes.ofHex('61'); + Assert.equals('a', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('c685'); + Assert.equals('Ζ…', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('e38390'); + Assert.equals('バ', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('f09d84b3'); + Assert.equals('𝄳', Utf8.decode(bytes.asView())); + } + + public function test_decode_empty_view() { + Assert.equals("",Utf8.decode(ViewExtensions.empty())); + } +} \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestMarshal.hx b/test/native/tests/marshalling/view/TestMarshal.hx index 60199b5be..8766560e9 100644 --- a/test/native/tests/marshalling/view/TestMarshal.hx +++ b/test/native/tests/marshalling/view/TestMarshal.hx @@ -115,233 +115,31 @@ class TestMarshal extends Test { Assert.isTrue(storage == value); } - function test_ascii_string_to_utf8() { - final source = "Hello, World!"; - final view = source.toCharView(); - - if (Assert.equals(source.length + 1, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_ascii_string_to_utf8_buffer() { - final source = "Hello, World!"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFFFFFFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toCharView(source, view); - - if (Assert.equals(source.length + 1, count)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_emoji_string_to_utf8() { - final source = "πŸ˜‚"; - final view = source.toCharView(); - - if (Assert.equals(5, view.length)) { - Assert.equals((0xf0:Char), view[0]); - Assert.equals((0x9f:Char), view[1]); - Assert.equals((0x98:Char), view[2]); - Assert.equals((0x82:Char), view[3]); - Assert.equals(0, view[4]); - } + function test_asCharView_null() { + Assert.raises(() -> Marshal.asCharView(null)); } - function test_emoji_string_to_utf8_buffer() { - final source = "πŸ˜‚"; - final buffer = Bytes.ofHex("FFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toCharView(source, view); - - if (Assert.equals(5, count)) { - Assert.equals((0xf0:Char), view[0]); - Assert.equals((0x9f:Char), view[1]); - Assert.equals((0x98:Char), view[2]); - Assert.equals((0x82:Char), view[3]); - Assert.equals(0, view[4]); - } + function test_asWideCharView_null() { + Assert.raises(() -> Marshal.asWideCharView(null)); } - function test_ascii_string_to_utf16() { - final source = "Hello, World!"; - final view = source.toWideCharView(); - - if (Assert.equals(source.length + 1, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } + function test_asCharView_wrong_encoding() { + Assert.raises(() -> Marshal.asCharView("πŸ˜‚")); } - function test_ascii_string_to_utf16_buffer() { - final source = "Hello, World!"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toWideCharView(source, view); - - if (Assert.equals(count, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_emoji_string_to_utf16() { - final source = "πŸ˜‚"; - final view = source.toWideCharView(); - - if (Assert.equals(3, view.length)) { - Assert.equals((0xD83D:Char16), view[0]); - Assert.equals((0xDE02:Char16), view[1]); - Assert.equals(0, view[2]); - } - } - - function test_emoji_string_to_utf16_buffer() { - final source = "πŸ˜‚"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFF"); - final view = buffer.asView().slice(0, 3 * 2).reinterpret(); - final count = Marshal.toWideCharView(source, view); - - if (Assert.equals(count, view.length)) { - Assert.equals((0xD83D:Char16), view[0]); - Assert.equals((0xDE02:Char16), view[1]); - Assert.equals(0, view[2]); - } - } - - function test_ascii_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); + function test_asWideCharView_wrong_encoding() { + Assert.raises(() -> Marshal.asWideCharView("hello")); } - function test_ascii_wide_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_null_terminated_ascii_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - buffer[5] = 0; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_null_terminated_ascii_wide_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - buffer[5] = 0; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_utf8_bytes_to_string() { - final buffer = Bytes.ofHex("f09f9882"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); - } - - function test_null_terminated_utf8_bytes_to_string() { - final buffer = Bytes.ofHex("f09f98820000"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); - } - - function test_utf16_bytes_to_string() { - final buffer = Bytes.ofHex("3DD802De"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); + function test_asCharView() { + final view = "hello".asCharView(); + + Assert.equals(5i64, view.length); } - function test_null_terminated_utf16_bytes_to_string() { - final buffer = Bytes.ofHex("3DD802De00000000"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); + function test_asWideCharView() { + final view = "πŸ˜‚".asWideCharView(); + + Assert.equals(2i64, view.length); } } \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestView.hx b/test/native/tests/marshalling/view/TestView.hx index 8eb6c5555..104a56e7a 100644 --- a/test/native/tests/marshalling/view/TestView.hx +++ b/test/native/tests/marshalling/view/TestView.hx @@ -139,8 +139,8 @@ class TestView extends Test { final index = 3; final slice = view.slice(index); - if (Assert.equals(7, slice.length)) { - for (i in 0...slice.length) { + if (Assert.equals(7i64, slice.length)) { + for (i in 0...haxe.Int64.toInt(slice.length)) { Assert.equals(i + index + 1, slice[i]); } } @@ -161,8 +161,8 @@ class TestView extends Test { final length = 4; final slice = view.slice(index, length); - if (Assert.equals(length, slice.length)) { - for (i in 0...slice.length) { + if (Assert.equals(haxe.Int64.ofInt(length), slice.length)) { + for (i in 0...haxe.Int64.toInt(slice.length)) { Assert.equals(i + index + 1, slice[i]); } } @@ -243,7 +243,7 @@ class TestView extends Test { final view = buffer.asView(); final second : View = view.reinterpret(); - Assert.equals(1, second.length); + Assert.equals(1i64, second.length); } function test_reinterpret_to_larger_type_not_enough_length() { @@ -251,7 +251,7 @@ class TestView extends Test { final view = buffer.asView(); final second : View = view.reinterpret(); - Assert.equals(0, second.length); + Assert.equals(0i64, second.length); } function test_reinterpret_to_value_type() { @@ -259,7 +259,7 @@ class TestView extends Test { final view = buffer.asView(); final points = (view.reinterpret() : View); - Assert.equals(2, points.length); + Assert.equals(2i64, points.length); Assert.equals(0f64, points[0].x); Assert.equals(0f64, points[0].y); diff --git a/test/native/tests/marshalling/view/TestViewExtensions.hx b/test/native/tests/marshalling/view/TestViewExtensions.hx index 271b1e103..e9ff3f942 100644 --- a/test/native/tests/marshalling/view/TestViewExtensions.hx +++ b/test/native/tests/marshalling/view/TestViewExtensions.hx @@ -1,5 +1,6 @@ package tests.marshalling.view; +import haxe.Int64; import haxe.io.UInt8Array; import haxe.io.UInt16Array; import haxe.io.UInt32Array; @@ -49,7 +50,7 @@ class TestViewExtensions extends Test { final array = [ 100, 200, 300, 400 ]; final view = array.asView(); - if (Assert.equals(array.length, view.length)) { + if (Assert.equals(Int64.ofInt(array.length), view.length)) { for (i in 0...array.length) { Assert.equals(array[i], view[i]); } @@ -60,7 +61,7 @@ class TestViewExtensions extends Test { final vector = Vector.fromData([ 100, 200, 300, 400 ]); final view = vector.asView(); - if (Assert.equals(vector.length, view.length)) { + if (Assert.equals(Int64.ofInt(vector.length), view.length)) { for (i in 0...vector.length) { Assert.equals(vector[i], view[i]); } @@ -71,7 +72,7 @@ class TestViewExtensions extends Test { final bytes = Bytes.ofData([ 10, 20, 30, 40 ]); final view = bytes.asView(); - if (Assert.equals(bytes.length, view.length)) { + if (Assert.equals(Int64.ofInt(bytes.length), view.length)) { for (i in 0...bytes.length) { Assert.equals(bytes.get(i), view[i]); } @@ -83,7 +84,7 @@ class TestViewExtensions extends Test { final buffer = ArrayBufferView.fromBytes(Bytes.ofData([ for (i in 0...100) i ])).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.byteLength, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.byteLength), view.length)) { for (i in 0...buffer.byteLength) { Assert.equals(buffer.buffer.get(index + i), view[i]); } @@ -95,7 +96,7 @@ class TestViewExtensions extends Test { final buffer = Float32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -107,7 +108,7 @@ class TestViewExtensions extends Test { final buffer = Float64Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -119,7 +120,7 @@ class TestViewExtensions extends Test { final buffer = Int32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -131,7 +132,7 @@ class TestViewExtensions extends Test { final buffer = UInt32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -143,7 +144,7 @@ class TestViewExtensions extends Test { final buffer = UInt16Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -155,10 +156,60 @@ class TestViewExtensions extends Test { final buffer = UInt8Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } } } + + function test_szToString_char_no_null() { + final vec = new Vector(4); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString_char() { + final vec = new Vector(9); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + vec[4] = 0; + vec[5] = 't'.code; + vec[6] = 'e'.code; + vec[7] = 's'.code; + vec[8] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString_char16_no_null() { + final vec = new Vector(4); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString16_char() { + final vec = new Vector(9); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + vec[4] = 0; + vec[5] = 't'.code; + vec[6] = 'e'.code; + vec[7] = 's'.code; + vec[8] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } } \ No newline at end of file diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index 8d7362e12..09d933727 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -68,6 +68,9 @@ + + + @@ -199,6 +202,10 @@ + + + +