forked from openrs2/openrs2
Will be used by the packclass implementation. Like Cp1252Charset, the JDK isn't guaranteed to provide an implementation of this charset. Signed-off-by: Graham <gpe@openrs2.dev>
parent
7ab3b3d335
commit
2525501901
@ -0,0 +1,99 @@ |
||||
package dev.openrs2.util.charset |
||||
|
||||
import java.nio.ByteBuffer |
||||
import java.nio.CharBuffer |
||||
import java.nio.charset.Charset |
||||
import java.nio.charset.CharsetDecoder |
||||
import java.nio.charset.CharsetEncoder |
||||
import java.nio.charset.CoderResult |
||||
|
||||
object ModifiedUtf8Charset : Charset("ModifiedUtf8", null) { |
||||
override fun contains(cs: Charset): Boolean { |
||||
return Charsets.UTF_8.contains(cs) || cs is Cp1252Charset || cs is ModifiedUtf8Charset |
||||
} |
||||
|
||||
override fun newEncoder(): CharsetEncoder { |
||||
return object : CharsetEncoder(this, 1F, 3F) { |
||||
override fun encodeLoop(input: CharBuffer, output: ByteBuffer): CoderResult { |
||||
while (input.hasRemaining()) { |
||||
val char = input.get() |
||||
|
||||
val len = if (char != '\u0000' && char < '\u0080') { |
||||
1 |
||||
} else if (char < '\u0800') { |
||||
2 |
||||
} else { |
||||
3 |
||||
} |
||||
|
||||
if (output.remaining() < len) { |
||||
input.position(input.position() - 1) |
||||
return CoderResult.OVERFLOW |
||||
} |
||||
|
||||
if (len == 1) { |
||||
output.put(char.toByte()) |
||||
} else if (len == 2) { |
||||
output.put((0xC0 or ((char.toInt() shr 6) and 0x1F)).toByte()) |
||||
output.put((0x80 or (char.toInt() and 0x3F)).toByte()) |
||||
} else { |
||||
output.put((0xE0 or ((char.toInt() shr 12) and 0x1F)).toByte()) |
||||
output.put((0x80 or ((char.toInt() shr 6) and 0x1F)).toByte()) |
||||
output.put((0x80 or (char.toInt() and 0x3F)).toByte()) |
||||
} |
||||
} |
||||
|
||||
return CoderResult.UNDERFLOW |
||||
} |
||||
} |
||||
} |
||||
|
||||
override fun newDecoder(): CharsetDecoder { |
||||
return object : CharsetDecoder(this, 1F, 1F) { |
||||
override fun decodeLoop(input: ByteBuffer, output: CharBuffer): CoderResult { |
||||
while (input.hasRemaining()) { |
||||
if (!output.hasRemaining()) { |
||||
return CoderResult.OVERFLOW |
||||
} |
||||
|
||||
val a = input.get().toInt() and 0xFF |
||||
if (a != 0 && a < 0x80) { |
||||
output.put(a.toChar()) |
||||
} else if ((a and 0xE0) == 0xC0) { |
||||
if (!input.hasRemaining()) { |
||||
input.position(input.position() - 1) |
||||
return CoderResult.UNDERFLOW |
||||
} |
||||
|
||||
val b = input.get().toInt() and 0xFF |
||||
if ((b and 0xC0) != 0x80) { |
||||
input.position(input.position() - 2) |
||||
return CoderResult.malformedForLength(2) |
||||
} |
||||
|
||||
output.put((((a and 0x1F) shl 6) or (b and 0x3F)).toChar()) |
||||
} else if ((a and 0xF0) == 0xE0) { |
||||
if (input.remaining() < 2) { |
||||
input.position(input.position() - 1) |
||||
return CoderResult.UNDERFLOW |
||||
} |
||||
|
||||
val b = input.get().toInt() and 0xFF |
||||
val c = input.get().toInt() and 0xFF |
||||
if ((b and 0xC0) != 0x80 || (c and 0xC0) != 0x80) { |
||||
input.position(input.position() - 3) |
||||
return CoderResult.malformedForLength(3) |
||||
} |
||||
|
||||
output.put((((a and 0x0F) shl 12) or ((b and 0x3F) shl 6) or (c and 0x3F)).toChar()) |
||||
} else { |
||||
input.position(input.position() - 1) |
||||
return CoderResult.malformedForLength(1) |
||||
} |
||||
} |
||||
|
||||
return CoderResult.UNDERFLOW |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,58 @@ |
||||
package dev.openrs2.util.charset |
||||
|
||||
import org.junit.jupiter.api.Assertions.assertArrayEquals |
||||
import kotlin.test.Test |
||||
import kotlin.test.assertEquals |
||||
import kotlin.test.assertTrue |
||||
|
||||
object ModifiedUtf8CharsetTest { |
||||
@Test |
||||
fun testEncode() { |
||||
assertArrayEquals(byteArrayOf(0xC0.toByte(), 0x80.toByte()), "\u0000".toByteArray(ModifiedUtf8Charset)) |
||||
assertArrayEquals(byteArrayOf(0x41), "A".toByteArray(ModifiedUtf8Charset)) |
||||
assertArrayEquals(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), "©".toByteArray(ModifiedUtf8Charset)) |
||||
assertArrayEquals( |
||||
byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), |
||||
"€".toByteArray(ModifiedUtf8Charset) |
||||
) |
||||
} |
||||
|
||||
@Test |
||||
fun testDecode() { |
||||
assertEquals("\u0000", String(byteArrayOf(0xC0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("A", String(byteArrayOf(0x41), ModifiedUtf8Charset)) |
||||
assertEquals("©", String(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals( |
||||
"€", |
||||
String(byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), ModifiedUtf8Charset) |
||||
) |
||||
|
||||
assertEquals("\uFFFD", String(byteArrayOf(0), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0x80.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0, 0x80.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xF0.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xF8.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xFE.toByte()), ModifiedUtf8Charset)) |
||||
assertEquals("\uFFFD", String(byteArrayOf(0xFF.toByte()), ModifiedUtf8Charset)) |
||||
} |
||||
|
||||
@Test |
||||
fun testContains() { |
||||
assertTrue(ModifiedUtf8Charset.contains(ModifiedUtf8Charset)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Cp1252Charset)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.US_ASCII)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.ISO_8859_1)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_8)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16BE)) |
||||
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16LE)) |
||||
} |
||||
} |
Loading…
Reference in new issue