Add ModifiedUtf8Charset

Will be used by the packclass implementation. Like Cp1252Charset, the
JDK isn't guaranteed to provide an implementation of this charset.

Signed-off-by: Graham <gpe@openrs2.dev>
pull/132/head
Graham 4 years ago
parent 7ab3b3d335
commit 2525501901
  1. 99
      util/src/main/java/dev/openrs2/util/charset/ModifiedUtf8Charset.kt
  2. 58
      util/src/test/java/dev/openrs2/util/charset/ModifiedUtf8CharsetTest.kt

@ -0,0 +1,99 @@
package dev.openrs2.util.charset
import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.Charset
import java.nio.charset.CharsetDecoder
import java.nio.charset.CharsetEncoder
import java.nio.charset.CoderResult
object ModifiedUtf8Charset : Charset("ModifiedUtf8", null) {
override fun contains(cs: Charset): Boolean {
return Charsets.UTF_8.contains(cs) || cs is Cp1252Charset || cs is ModifiedUtf8Charset
}
override fun newEncoder(): CharsetEncoder {
return object : CharsetEncoder(this, 1F, 3F) {
override fun encodeLoop(input: CharBuffer, output: ByteBuffer): CoderResult {
while (input.hasRemaining()) {
val char = input.get()
val len = if (char != '\u0000' && char < '\u0080') {
1
} else if (char < '\u0800') {
2
} else {
3
}
if (output.remaining() < len) {
input.position(input.position() - 1)
return CoderResult.OVERFLOW
}
if (len == 1) {
output.put(char.toByte())
} else if (len == 2) {
output.put((0xC0 or ((char.toInt() shr 6) and 0x1F)).toByte())
output.put((0x80 or (char.toInt() and 0x3F)).toByte())
} else {
output.put((0xE0 or ((char.toInt() shr 12) and 0x1F)).toByte())
output.put((0x80 or ((char.toInt() shr 6) and 0x1F)).toByte())
output.put((0x80 or (char.toInt() and 0x3F)).toByte())
}
}
return CoderResult.UNDERFLOW
}
}
}
override fun newDecoder(): CharsetDecoder {
return object : CharsetDecoder(this, 1F, 1F) {
override fun decodeLoop(input: ByteBuffer, output: CharBuffer): CoderResult {
while (input.hasRemaining()) {
if (!output.hasRemaining()) {
return CoderResult.OVERFLOW
}
val a = input.get().toInt() and 0xFF
if (a != 0 && a < 0x80) {
output.put(a.toChar())
} else if ((a and 0xE0) == 0xC0) {
if (!input.hasRemaining()) {
input.position(input.position() - 1)
return CoderResult.UNDERFLOW
}
val b = input.get().toInt() and 0xFF
if ((b and 0xC0) != 0x80) {
input.position(input.position() - 2)
return CoderResult.malformedForLength(2)
}
output.put((((a and 0x1F) shl 6) or (b and 0x3F)).toChar())
} else if ((a and 0xF0) == 0xE0) {
if (input.remaining() < 2) {
input.position(input.position() - 1)
return CoderResult.UNDERFLOW
}
val b = input.get().toInt() and 0xFF
val c = input.get().toInt() and 0xFF
if ((b and 0xC0) != 0x80 || (c and 0xC0) != 0x80) {
input.position(input.position() - 3)
return CoderResult.malformedForLength(3)
}
output.put((((a and 0x0F) shl 12) or ((b and 0x3F) shl 6) or (c and 0x3F)).toChar())
} else {
input.position(input.position() - 1)
return CoderResult.malformedForLength(1)
}
}
return CoderResult.UNDERFLOW
}
}
}
}

@ -0,0 +1,58 @@
package dev.openrs2.util.charset
import org.junit.jupiter.api.Assertions.assertArrayEquals
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue
object ModifiedUtf8CharsetTest {
@Test
fun testEncode() {
assertArrayEquals(byteArrayOf(0xC0.toByte(), 0x80.toByte()), "\u0000".toByteArray(ModifiedUtf8Charset))
assertArrayEquals(byteArrayOf(0x41), "A".toByteArray(ModifiedUtf8Charset))
assertArrayEquals(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), "©".toByteArray(ModifiedUtf8Charset))
assertArrayEquals(
byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()),
"".toByteArray(ModifiedUtf8Charset)
)
}
@Test
fun testDecode() {
assertEquals("\u0000", String(byteArrayOf(0xC0.toByte(), 0x80.toByte()), ModifiedUtf8Charset))
assertEquals("A", String(byteArrayOf(0x41), ModifiedUtf8Charset))
assertEquals("©", String(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), ModifiedUtf8Charset))
assertEquals(
"",
String(byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), ModifiedUtf8Charset)
)
assertEquals("\uFFFD", String(byteArrayOf(0), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0x80.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte(), 0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte(), 0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0, 0x80.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xF0.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xF8.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xFE.toByte()), ModifiedUtf8Charset))
assertEquals("\uFFFD", String(byteArrayOf(0xFF.toByte()), ModifiedUtf8Charset))
}
@Test
fun testContains() {
assertTrue(ModifiedUtf8Charset.contains(ModifiedUtf8Charset))
assertTrue(ModifiedUtf8Charset.contains(Cp1252Charset))
assertTrue(ModifiedUtf8Charset.contains(Charsets.US_ASCII))
assertTrue(ModifiedUtf8Charset.contains(Charsets.ISO_8859_1))
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_8))
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16))
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16BE))
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16LE))
}
}
Loading…
Cancel
Save