Skip to content

Commit 44d4274

Browse files
committed
move text utility methods into McpTextUtilities
1 parent 50a8255 commit 44d4274

File tree

2 files changed

+165
-163
lines changed

2 files changed

+165
-163
lines changed

src/ModelContextProtocol.Core/McpTextUtilities.cs

Lines changed: 164 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using System.Buffers.Text;
2+
using System.Diagnostics;
23
using System.Runtime.InteropServices;
34
using System.Text;
5+
using System.Text.Json;
46

57
namespace ModelContextProtocol.Core;
68

@@ -69,4 +71,165 @@ public static bool IsWhiteSpace(ReadOnlySpan<byte> utf8Bytes)
6971

7072
return true;
7173
}
72-
}
74+
75+
internal static byte[] UnescapeJsonStringToUtf8(ReadOnlySpan<byte> escaped)
76+
{
77+
// Two-pass: first compute output length, then write, to avoid intermediate buffers/copies.
78+
int outputLength = 0;
79+
for (int i = 0; i < escaped.Length; i++)
80+
{
81+
byte b = escaped[i];
82+
if (b != (byte)'\\')
83+
{
84+
outputLength++;
85+
continue;
86+
}
87+
88+
if (++i >= escaped.Length)
89+
{
90+
throw new JsonException();
91+
}
92+
93+
switch (escaped[i])
94+
{
95+
case (byte)'"':
96+
case (byte)'\\':
97+
case (byte)'/':
98+
case (byte)'b':
99+
case (byte)'f':
100+
case (byte)'n':
101+
case (byte)'r':
102+
case (byte)'t':
103+
outputLength++;
104+
break;
105+
106+
case (byte)'u':
107+
outputLength += GetUtf8ByteCountForEscapedUnicode(escaped, ref i);
108+
break;
109+
110+
default:
111+
throw new JsonException();
112+
}
113+
}
114+
115+
byte[] result = new byte[outputLength];
116+
int dst = 0;
117+
118+
for (int i = 0; i < escaped.Length; i++)
119+
{
120+
byte b = escaped[i];
121+
if (b != (byte)'\\')
122+
{
123+
result[dst++] = b;
124+
continue;
125+
}
126+
127+
if (++i >= escaped.Length)
128+
{
129+
throw new JsonException();
130+
}
131+
132+
byte esc = escaped[i];
133+
switch (esc)
134+
{
135+
case (byte)'"': result[dst++] = (byte)'"'; break;
136+
case (byte)'\\': result[dst++] = (byte)'\\'; break;
137+
case (byte)'/': result[dst++] = (byte)'/'; break;
138+
case (byte)'b': result[dst++] = 0x08; break;
139+
case (byte)'f': result[dst++] = 0x0C; break;
140+
case (byte)'n': result[dst++] = 0x0A; break;
141+
case (byte)'r': result[dst++] = 0x0D; break;
142+
case (byte)'t': result[dst++] = 0x09; break;
143+
144+
case (byte)'u':
145+
uint scalar = ReadEscapedUnicodeScalar(escaped, ref i);
146+
WriteUtf8Scalar(scalar, result, ref dst);
147+
break;
148+
149+
default:
150+
throw new JsonException();
151+
}
152+
}
153+
154+
Debug.Assert(dst == result.Length);
155+
return result;
156+
}
157+
158+
internal static int GetUtf8ByteCountForEscapedUnicode(ReadOnlySpan<byte> escaped, ref int i)
159+
{
160+
uint scalar = ReadEscapedUnicodeScalar(escaped, ref i);
161+
return scalar <= 0x7F ? 1 :
162+
scalar <= 0x7FF ? 2 :
163+
scalar <= 0xFFFF ? 3 :
164+
4;
165+
}
166+
167+
internal static uint ReadEscapedUnicodeScalar(ReadOnlySpan<byte> escaped, ref int i)
168+
{
169+
// i points at 'u'.
170+
if (i + 4 >= escaped.Length)
171+
{
172+
throw new JsonException();
173+
}
174+
175+
uint codeUnit = (uint)(FromHex(escaped[i + 1]) << 12 |
176+
FromHex(escaped[i + 2]) << 8 |
177+
FromHex(escaped[i + 3]) << 4 |
178+
FromHex(escaped[i + 4]));
179+
i += 4;
180+
181+
// Surrogate pair: \uD800-\uDBFF followed by \uDC00-\uDFFF
182+
if (codeUnit is >= 0xD800 and <= 0xDBFF)
183+
{
184+
int lookahead = i + 1;
185+
if (lookahead + 5 < escaped.Length && escaped[lookahead] == (byte)'\\' && escaped[lookahead + 1] == (byte)'u')
186+
{
187+
uint low = (uint)(FromHex(escaped[lookahead + 2]) << 12 |
188+
FromHex(escaped[lookahead + 3]) << 8 |
189+
FromHex(escaped[lookahead + 4]) << 4 |
190+
FromHex(escaped[lookahead + 5]));
191+
192+
if (low is >= 0xDC00 and <= 0xDFFF)
193+
{
194+
i = lookahead + 5;
195+
return 0x10000u + ((codeUnit - 0xD800u) << 10) + (low - 0xDC00u);
196+
}
197+
}
198+
}
199+
200+
return codeUnit;
201+
}
202+
203+
internal static int FromHex(byte b)
204+
{
205+
if ((uint)(b - '0') <= 9) return b - '0';
206+
if ((uint)((b | 0x20) - 'a') <= 5) return (b | 0x20) - 'a' + 10;
207+
throw new JsonException();
208+
}
209+
210+
internal static void WriteUtf8Scalar(uint scalar, byte[] destination, ref int dst)
211+
{
212+
if (scalar <= 0x7F)
213+
{
214+
destination[dst++] = (byte)scalar;
215+
}
216+
else if (scalar <= 0x7FF)
217+
{
218+
destination[dst++] = (byte)(0xC0 | (scalar >> 6));
219+
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
220+
}
221+
else if (scalar <= 0xFFFF)
222+
{
223+
destination[dst++] = (byte)(0xE0 | (scalar >> 12));
224+
destination[dst++] = (byte)(0x80 | ((scalar >> 6) & 0x3F));
225+
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
226+
}
227+
else
228+
{
229+
destination[dst++] = (byte)(0xF0 | (scalar >> 18));
230+
destination[dst++] = (byte)(0x80 | ((scalar >> 12) & 0x3F));
231+
destination[dst++] = (byte)(0x80 | ((scalar >> 6) & 0x3F));
232+
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
233+
}
234+
}
235+
}

src/ModelContextProtocol.Core/Protocol/ContentBlock.cs

Lines changed: 1 addition & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -293,168 +293,7 @@ internal static ReadOnlyMemory<byte> ReadUtf8StringValueAsBytes(ref Utf8JsonRead
293293

294294
// The value is escaped (e.g. contains \uXXXX or \n); unescape into UTF-8 bytes.
295295
ReadOnlySpan<byte> escaped = reader.HasValueSequence ? reader.ValueSequence.ToArray() : reader.ValueSpan;
296-
return UnescapeJsonStringToUtf8(escaped);
297-
}
298-
299-
private static byte[] UnescapeJsonStringToUtf8(ReadOnlySpan<byte> escaped)
300-
{
301-
// Two-pass: first compute output length, then write, to avoid intermediate buffers/copies.
302-
int outputLength = 0;
303-
for (int i = 0; i < escaped.Length; i++)
304-
{
305-
byte b = escaped[i];
306-
if (b != (byte)'\\')
307-
{
308-
outputLength++;
309-
continue;
310-
}
311-
312-
if (++i >= escaped.Length)
313-
{
314-
throw new JsonException();
315-
}
316-
317-
switch (escaped[i])
318-
{
319-
case (byte)'"':
320-
case (byte)'\\':
321-
case (byte)'/':
322-
case (byte)'b':
323-
case (byte)'f':
324-
case (byte)'n':
325-
case (byte)'r':
326-
case (byte)'t':
327-
outputLength++;
328-
break;
329-
330-
case (byte)'u':
331-
outputLength += GetUtf8ByteCountForEscapedUnicode(escaped, ref i);
332-
break;
333-
334-
default:
335-
throw new JsonException();
336-
}
337-
}
338-
339-
byte[] result = new byte[outputLength];
340-
int dst = 0;
341-
342-
for (int i = 0; i < escaped.Length; i++)
343-
{
344-
byte b = escaped[i];
345-
if (b != (byte)'\\')
346-
{
347-
result[dst++] = b;
348-
continue;
349-
}
350-
351-
if (++i >= escaped.Length)
352-
{
353-
throw new JsonException();
354-
}
355-
356-
byte esc = escaped[i];
357-
switch (esc)
358-
{
359-
case (byte)'"': result[dst++] = (byte)'"'; break;
360-
case (byte)'\\': result[dst++] = (byte)'\\'; break;
361-
case (byte)'/': result[dst++] = (byte)'/'; break;
362-
case (byte)'b': result[dst++] = 0x08; break;
363-
case (byte)'f': result[dst++] = 0x0C; break;
364-
case (byte)'n': result[dst++] = 0x0A; break;
365-
case (byte)'r': result[dst++] = 0x0D; break;
366-
case (byte)'t': result[dst++] = 0x09; break;
367-
368-
case (byte)'u':
369-
uint scalar = ReadEscapedUnicodeScalar(escaped, ref i);
370-
WriteUtf8Scalar(scalar, result, ref dst);
371-
break;
372-
373-
default:
374-
throw new JsonException();
375-
}
376-
}
377-
378-
Debug.Assert(dst == result.Length);
379-
return result;
380-
}
381-
382-
private static int GetUtf8ByteCountForEscapedUnicode(ReadOnlySpan<byte> escaped, ref int i)
383-
{
384-
uint scalar = ReadEscapedUnicodeScalar(escaped, ref i);
385-
return scalar <= 0x7F ? 1 :
386-
scalar <= 0x7FF ? 2 :
387-
scalar <= 0xFFFF ? 3 :
388-
4;
389-
}
390-
391-
private static uint ReadEscapedUnicodeScalar(ReadOnlySpan<byte> escaped, ref int i)
392-
{
393-
// i points at 'u'.
394-
if (i + 4 >= escaped.Length)
395-
{
396-
throw new JsonException();
397-
}
398-
399-
uint codeUnit = (uint)(FromHex(escaped[i + 1]) << 12 |
400-
FromHex(escaped[i + 2]) << 8 |
401-
FromHex(escaped[i + 3]) << 4 |
402-
FromHex(escaped[i + 4]));
403-
i += 4;
404-
405-
// Surrogate pair: \uD800-\uDBFF followed by \uDC00-\uDFFF
406-
if (codeUnit is >= 0xD800 and <= 0xDBFF)
407-
{
408-
int lookahead = i + 1;
409-
if (lookahead + 5 < escaped.Length && escaped[lookahead] == (byte)'\\' && escaped[lookahead + 1] == (byte)'u')
410-
{
411-
uint low = (uint)(FromHex(escaped[lookahead + 2]) << 12 |
412-
FromHex(escaped[lookahead + 3]) << 8 |
413-
FromHex(escaped[lookahead + 4]) << 4 |
414-
FromHex(escaped[lookahead + 5]));
415-
416-
if (low is >= 0xDC00 and <= 0xDFFF)
417-
{
418-
i = lookahead + 5;
419-
return 0x10000u + ((codeUnit - 0xD800u) << 10) + (low - 0xDC00u);
420-
}
421-
}
422-
}
423-
424-
return codeUnit;
425-
}
426-
427-
private static int FromHex(byte b)
428-
{
429-
if ((uint)(b - '0') <= 9) return b - '0';
430-
if ((uint)((b | 0x20) - 'a') <= 5) return (b | 0x20) - 'a' + 10;
431-
throw new JsonException();
432-
}
433-
434-
private static void WriteUtf8Scalar(uint scalar, byte[] destination, ref int dst)
435-
{
436-
if (scalar <= 0x7F)
437-
{
438-
destination[dst++] = (byte)scalar;
439-
}
440-
else if (scalar <= 0x7FF)
441-
{
442-
destination[dst++] = (byte)(0xC0 | (scalar >> 6));
443-
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
444-
}
445-
else if (scalar <= 0xFFFF)
446-
{
447-
destination[dst++] = (byte)(0xE0 | (scalar >> 12));
448-
destination[dst++] = (byte)(0x80 | ((scalar >> 6) & 0x3F));
449-
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
450-
}
451-
else
452-
{
453-
destination[dst++] = (byte)(0xF0 | (scalar >> 18));
454-
destination[dst++] = (byte)(0x80 | ((scalar >> 12) & 0x3F));
455-
destination[dst++] = (byte)(0x80 | ((scalar >> 6) & 0x3F));
456-
destination[dst++] = (byte)(0x80 | (scalar & 0x3F));
457-
}
296+
return Core.McpTextUtilities.UnescapeJsonStringToUtf8(escaped);
458297
}
459298

460299
/// <inheritdoc/>

0 commit comments

Comments
 (0)