diff --git a/util/src/main/java/dev/openrs2/util/charset/ModifiedUtf8Charset.kt b/util/src/main/java/dev/openrs2/util/charset/ModifiedUtf8Charset.kt new file mode 100644 index 00000000..44447810 --- /dev/null +++ b/util/src/main/java/dev/openrs2/util/charset/ModifiedUtf8Charset.kt @@ -0,0 +1,99 @@ +package dev.openrs2.util.charset + +import java.nio.ByteBuffer +import java.nio.CharBuffer +import java.nio.charset.Charset +import java.nio.charset.CharsetDecoder +import java.nio.charset.CharsetEncoder +import java.nio.charset.CoderResult + +object ModifiedUtf8Charset : Charset("ModifiedUtf8", null) { + override fun contains(cs: Charset): Boolean { + return Charsets.UTF_8.contains(cs) || cs is Cp1252Charset || cs is ModifiedUtf8Charset + } + + override fun newEncoder(): CharsetEncoder { + return object : CharsetEncoder(this, 1F, 3F) { + override fun encodeLoop(input: CharBuffer, output: ByteBuffer): CoderResult { + while (input.hasRemaining()) { + val char = input.get() + + val len = if (char != '\u0000' && char < '\u0080') { + 1 + } else if (char < '\u0800') { + 2 + } else { + 3 + } + + if (output.remaining() < len) { + input.position(input.position() - 1) + return CoderResult.OVERFLOW + } + + if (len == 1) { + output.put(char.toByte()) + } else if (len == 2) { + output.put((0xC0 or ((char.toInt() shr 6) and 0x1F)).toByte()) + output.put((0x80 or (char.toInt() and 0x3F)).toByte()) + } else { + output.put((0xE0 or ((char.toInt() shr 12) and 0x1F)).toByte()) + output.put((0x80 or ((char.toInt() shr 6) and 0x1F)).toByte()) + output.put((0x80 or (char.toInt() and 0x3F)).toByte()) + } + } + + return CoderResult.UNDERFLOW + } + } + } + + override fun newDecoder(): CharsetDecoder { + return object : CharsetDecoder(this, 1F, 1F) { + override fun decodeLoop(input: ByteBuffer, output: CharBuffer): CoderResult { + while (input.hasRemaining()) { + if (!output.hasRemaining()) { + return CoderResult.OVERFLOW + } + + val a = input.get().toInt() and 0xFF + if (a != 0 && a < 0x80) { + output.put(a.toChar()) + } else if ((a and 0xE0) == 0xC0) { + if (!input.hasRemaining()) { + input.position(input.position() - 1) + return CoderResult.UNDERFLOW + } + + val b = input.get().toInt() and 0xFF + if ((b and 0xC0) != 0x80) { + input.position(input.position() - 2) + return CoderResult.malformedForLength(2) + } + + output.put((((a and 0x1F) shl 6) or (b and 0x3F)).toChar()) + } else if ((a and 0xF0) == 0xE0) { + if (input.remaining() < 2) { + input.position(input.position() - 1) + return CoderResult.UNDERFLOW + } + + val b = input.get().toInt() and 0xFF + val c = input.get().toInt() and 0xFF + if ((b and 0xC0) != 0x80 || (c and 0xC0) != 0x80) { + input.position(input.position() - 3) + return CoderResult.malformedForLength(3) + } + + output.put((((a and 0x0F) shl 12) or ((b and 0x3F) shl 6) or (c and 0x3F)).toChar()) + } else { + input.position(input.position() - 1) + return CoderResult.malformedForLength(1) + } + } + + return CoderResult.UNDERFLOW + } + } + } +} diff --git a/util/src/test/java/dev/openrs2/util/charset/ModifiedUtf8CharsetTest.kt b/util/src/test/java/dev/openrs2/util/charset/ModifiedUtf8CharsetTest.kt new file mode 100644 index 00000000..fe739a3a --- /dev/null +++ b/util/src/test/java/dev/openrs2/util/charset/ModifiedUtf8CharsetTest.kt @@ -0,0 +1,58 @@ +package dev.openrs2.util.charset + +import org.junit.jupiter.api.Assertions.assertArrayEquals +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue + +object ModifiedUtf8CharsetTest { + @Test + fun testEncode() { + assertArrayEquals(byteArrayOf(0xC0.toByte(), 0x80.toByte()), "\u0000".toByteArray(ModifiedUtf8Charset)) + assertArrayEquals(byteArrayOf(0x41), "A".toByteArray(ModifiedUtf8Charset)) + assertArrayEquals(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), "©".toByteArray(ModifiedUtf8Charset)) + assertArrayEquals( + byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), + "€".toByteArray(ModifiedUtf8Charset) + ) + } + + @Test + fun testDecode() { + assertEquals("\u0000", String(byteArrayOf(0xC0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) + assertEquals("A", String(byteArrayOf(0x41), ModifiedUtf8Charset)) + assertEquals("©", String(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), ModifiedUtf8Charset)) + assertEquals( + "€", + String(byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), ModifiedUtf8Charset) + ) + + assertEquals("\uFFFD", String(byteArrayOf(0), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0x80.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte(), 0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte(), 0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0, 0x80.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xF0.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xF8.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xFE.toByte()), ModifiedUtf8Charset)) + assertEquals("\uFFFD", String(byteArrayOf(0xFF.toByte()), ModifiedUtf8Charset)) + } + + @Test + fun testContains() { + assertTrue(ModifiedUtf8Charset.contains(ModifiedUtf8Charset)) + assertTrue(ModifiedUtf8Charset.contains(Cp1252Charset)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.US_ASCII)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.ISO_8859_1)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_8)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16BE)) + assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16LE)) + } +}