forked from openrs2/openrs2
Will be used by the packclass implementation. Like Cp1252Charset, the JDK isn't guaranteed to provide an implementation of this charset. Signed-off-by: Graham <gpe@openrs2.dev>bzip2
parent
7ab3b3d335
commit
2525501901
@ -0,0 +1,99 @@ |
|||||||
|
package dev.openrs2.util.charset |
||||||
|
|
||||||
|
import java.nio.ByteBuffer |
||||||
|
import java.nio.CharBuffer |
||||||
|
import java.nio.charset.Charset |
||||||
|
import java.nio.charset.CharsetDecoder |
||||||
|
import java.nio.charset.CharsetEncoder |
||||||
|
import java.nio.charset.CoderResult |
||||||
|
|
||||||
|
object ModifiedUtf8Charset : Charset("ModifiedUtf8", null) { |
||||||
|
override fun contains(cs: Charset): Boolean { |
||||||
|
return Charsets.UTF_8.contains(cs) || cs is Cp1252Charset || cs is ModifiedUtf8Charset |
||||||
|
} |
||||||
|
|
||||||
|
override fun newEncoder(): CharsetEncoder { |
||||||
|
return object : CharsetEncoder(this, 1F, 3F) { |
||||||
|
override fun encodeLoop(input: CharBuffer, output: ByteBuffer): CoderResult { |
||||||
|
while (input.hasRemaining()) { |
||||||
|
val char = input.get() |
||||||
|
|
||||||
|
val len = if (char != '\u0000' && char < '\u0080') { |
||||||
|
1 |
||||||
|
} else if (char < '\u0800') { |
||||||
|
2 |
||||||
|
} else { |
||||||
|
3 |
||||||
|
} |
||||||
|
|
||||||
|
if (output.remaining() < len) { |
||||||
|
input.position(input.position() - 1) |
||||||
|
return CoderResult.OVERFLOW |
||||||
|
} |
||||||
|
|
||||||
|
if (len == 1) { |
||||||
|
output.put(char.toByte()) |
||||||
|
} else if (len == 2) { |
||||||
|
output.put((0xC0 or ((char.toInt() shr 6) and 0x1F)).toByte()) |
||||||
|
output.put((0x80 or (char.toInt() and 0x3F)).toByte()) |
||||||
|
} else { |
||||||
|
output.put((0xE0 or ((char.toInt() shr 12) and 0x1F)).toByte()) |
||||||
|
output.put((0x80 or ((char.toInt() shr 6) and 0x1F)).toByte()) |
||||||
|
output.put((0x80 or (char.toInt() and 0x3F)).toByte()) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return CoderResult.UNDERFLOW |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
override fun newDecoder(): CharsetDecoder { |
||||||
|
return object : CharsetDecoder(this, 1F, 1F) { |
||||||
|
override fun decodeLoop(input: ByteBuffer, output: CharBuffer): CoderResult { |
||||||
|
while (input.hasRemaining()) { |
||||||
|
if (!output.hasRemaining()) { |
||||||
|
return CoderResult.OVERFLOW |
||||||
|
} |
||||||
|
|
||||||
|
val a = input.get().toInt() and 0xFF |
||||||
|
if (a != 0 && a < 0x80) { |
||||||
|
output.put(a.toChar()) |
||||||
|
} else if ((a and 0xE0) == 0xC0) { |
||||||
|
if (!input.hasRemaining()) { |
||||||
|
input.position(input.position() - 1) |
||||||
|
return CoderResult.UNDERFLOW |
||||||
|
} |
||||||
|
|
||||||
|
val b = input.get().toInt() and 0xFF |
||||||
|
if ((b and 0xC0) != 0x80) { |
||||||
|
input.position(input.position() - 2) |
||||||
|
return CoderResult.malformedForLength(2) |
||||||
|
} |
||||||
|
|
||||||
|
output.put((((a and 0x1F) shl 6) or (b and 0x3F)).toChar()) |
||||||
|
} else if ((a and 0xF0) == 0xE0) { |
||||||
|
if (input.remaining() < 2) { |
||||||
|
input.position(input.position() - 1) |
||||||
|
return CoderResult.UNDERFLOW |
||||||
|
} |
||||||
|
|
||||||
|
val b = input.get().toInt() and 0xFF |
||||||
|
val c = input.get().toInt() and 0xFF |
||||||
|
if ((b and 0xC0) != 0x80 || (c and 0xC0) != 0x80) { |
||||||
|
input.position(input.position() - 3) |
||||||
|
return CoderResult.malformedForLength(3) |
||||||
|
} |
||||||
|
|
||||||
|
output.put((((a and 0x0F) shl 12) or ((b and 0x3F) shl 6) or (c and 0x3F)).toChar()) |
||||||
|
} else { |
||||||
|
input.position(input.position() - 1) |
||||||
|
return CoderResult.malformedForLength(1) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return CoderResult.UNDERFLOW |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,58 @@ |
|||||||
|
package dev.openrs2.util.charset |
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions.assertArrayEquals |
||||||
|
import kotlin.test.Test |
||||||
|
import kotlin.test.assertEquals |
||||||
|
import kotlin.test.assertTrue |
||||||
|
|
||||||
|
object ModifiedUtf8CharsetTest { |
||||||
|
@Test |
||||||
|
fun testEncode() { |
||||||
|
assertArrayEquals(byteArrayOf(0xC0.toByte(), 0x80.toByte()), "\u0000".toByteArray(ModifiedUtf8Charset)) |
||||||
|
assertArrayEquals(byteArrayOf(0x41), "A".toByteArray(ModifiedUtf8Charset)) |
||||||
|
assertArrayEquals(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), "©".toByteArray(ModifiedUtf8Charset)) |
||||||
|
assertArrayEquals( |
||||||
|
byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), |
||||||
|
"€".toByteArray(ModifiedUtf8Charset) |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
@Test |
||||||
|
fun testDecode() { |
||||||
|
assertEquals("\u0000", String(byteArrayOf(0xC0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("A", String(byteArrayOf(0x41), ModifiedUtf8Charset)) |
||||||
|
assertEquals("©", String(byteArrayOf(0xC2.toByte(), 0xA9.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals( |
||||||
|
"€", |
||||||
|
String(byteArrayOf(0xE2.toByte(), 0x82.toByte(), 0xAC.toByte()), ModifiedUtf8Charset) |
||||||
|
) |
||||||
|
|
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0x80.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xC0.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0x80.toByte(), 0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xE0.toByte(), 0, 0x80.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xF0.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xF8.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xFC.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xFE.toByte()), ModifiedUtf8Charset)) |
||||||
|
assertEquals("\uFFFD", String(byteArrayOf(0xFF.toByte()), ModifiedUtf8Charset)) |
||||||
|
} |
||||||
|
|
||||||
|
@Test |
||||||
|
fun testContains() { |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(ModifiedUtf8Charset)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Cp1252Charset)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.US_ASCII)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.ISO_8859_1)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_8)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16BE)) |
||||||
|
assertTrue(ModifiedUtf8Charset.contains(Charsets.UTF_16LE)) |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue