C backend: improve representation of string literals, fix them for various corner cases

This commit is contained in:
Alexey Andreev 2018-05-06 00:48:27 +03:00
parent 37efbd742b
commit 401a9ed212
4 changed files with 54 additions and 36 deletions

View File

@ -130,7 +130,7 @@ public class ClassGenerator {
generateStaticGCRoots(); generateStaticGCRoots();
generateLayoutArray(); generateLayoutArray();
new StringPoolGenerator(stringPoolWriter, context.getNames()).generate(context.getStringPool().getStrings()); new StringPoolGenerator(stringPoolWriter).generate(context.getStringPool().getStrings());
for (String include : includes) { for (String include : includes) {
includesWriter.println("#include " + include); includesWriter.println("#include " + include);

View File

@ -69,6 +69,8 @@ public class NameProvider {
memberFieldNames.put(new FieldReference(RuntimeObject.class.getName(), "classReference"), "header"); memberFieldNames.put(new FieldReference(RuntimeObject.class.getName(), "classReference"), "header");
memberFieldNames.put(new FieldReference(RuntimeArray.class.getName(), "size"), "size"); memberFieldNames.put(new FieldReference(RuntimeArray.class.getName(), "size"), "size");
memberFieldNames.put(new FieldReference(String.class.getName(), "characters"), "characters");
memberFieldNames.put(new FieldReference(String.class.getName(), "hashCode"), "hashCode");
occupiedClassNames.put(RuntimeObject.class.getName(), new HashSet<>(Arrays.asList("header"))); occupiedClassNames.put(RuntimeObject.class.getName(), new HashSet<>(Arrays.asList("header")));
occupiedClassNames.put(RuntimeArray.class.getName(), new HashSet<>(Arrays.asList("length"))); occupiedClassNames.put(RuntimeArray.class.getName(), new HashSet<>(Arrays.asList("length")));

View File

@ -16,56 +16,46 @@
package org.teavm.backend.c.generate; package org.teavm.backend.c.generate;
import java.util.List; import java.util.List;
import org.teavm.model.FieldReference;
public class StringPoolGenerator { public class StringPoolGenerator {
private CodeWriter writer; private CodeWriter writer;
private NameProvider names;
public StringPoolGenerator(CodeWriter writer, NameProvider names) { public StringPoolGenerator(CodeWriter writer) {
this.writer = writer; this.writer = writer;
this.names = names;
} }
public void generate(List<? extends String> strings) { public void generate(List<? extends String> strings) {
generateStringArrays(strings);
generateStringObjects(strings);
}
private void generateStringArrays(List<? extends String> strings) {
for (int i = 0; i < strings.size(); ++i) {
String s = strings.get(i);
writer.print("static struct { JavaArray hdr; char16_t data[" + (s.length() + 1) + "]; } str_array_" + i)
.println(" = {").indent();
writer.println(".hdr = { .size = " + s.length() + "},");
writer.print(".data = ");
generateStringLiteral(s);
writer.println();
writer.outdent().println("};");
}
}
private void generateStringObjects(List<? extends String> strings) {
String charactersName = names.forMemberField(new FieldReference(String.class.getName(), "characters"));
String hashCodeName = names.forMemberField(new FieldReference(String.class.getName(), "hashCode"));
writer.println("static JavaString stringPool[" + strings.size() + "] = {").indent(); writer.println("static JavaString stringPool[" + strings.size() + "] = {").indent();
for (int i = 0; i < strings.size(); ++i) { for (int i = 0; i < strings.size(); ++i) {
writer.println("{").indent(); String s = strings.get(i);
writer.println("." + charactersName + " = (JavaArray*) &str_array_" + i + ","); boolean codes = hasBadCharacters(s);
writer.println("." + hashCodeName + " = INT32_C(" + strings.get(i).hashCode() + ")"); String macroName = codes ? "TEAVM_STRING_FROM_CODES" : "TEAVM_STRING";
writer.outdent().print("}"); writer.print(macroName + "(" + s.length() + ", " + s.hashCode() + ",");
if (codes) {
if (i < strings.size() - 1) { generateNumericStringLiteral(s);
writer.print(","); } else {
generateSimpleStringLiteral(s);
} }
writer.print(")");
writer.print(i < strings.size() - 1 ? "," : " ");
writer.print(" // string #" + i);
writer.println(); writer.println();
} }
writer.outdent().println("};"); writer.outdent().println("};");
} }
private void generateStringLiteral(String string) { private boolean hasBadCharacters(String string) {
for (int i = 0; i < string.length(); ++i) {
char c = string.charAt(i);
if (c == 0 || Character.isSurrogate(c)) {
return true;
}
}
return false;
}
private void generateSimpleStringLiteral(String string) {
writer.print("u\""); writer.print("u\"");
for (int j = 0; j < string.length(); ++j) { for (int j = 0; j < string.length(); ++j) {
@ -88,7 +78,7 @@ public class StringPoolGenerator {
break; break;
default: default:
if (c < 32) { if (c < 32) {
writer.print("\\x" + Character.forDigit(c >> 4, 16) + Character.forDigit(c & 0xF, 16)); writer.print("\\0" + Character.forDigit(c >> 3, 8) + Character.forDigit(c & 0x7, 8));
} else if (c > 127) { } else if (c > 127) {
writer.print("\\u" writer.print("\\u"
+ Character.forDigit(c >> 12, 16) + Character.forDigit(c >> 12, 16)
@ -104,4 +94,14 @@ public class StringPoolGenerator {
writer.print("\""); writer.print("\"");
} }
private void generateNumericStringLiteral(String string) {
for (int i = 0; i < string.length(); ++i) {
if (i > 0) {
writer.print(", ");
}
int c = string.charAt(i);
writer.print(Integer.toString(c));
}
}
} }

View File

@ -80,6 +80,22 @@ static inline void* checkcast(void*, int32_t (*)(JavaClass*));
#define ADDRESS_ADD(address, offset) ((char *) (address) + (offset)) #define ADDRESS_ADD(address, offset) ((char *) (address) + (offset))
#define STRUCTURE_ADD(structure, address, offset) (((structure*) (address)) + offset) #define STRUCTURE_ADD(structure, address, offset) (((structure*) (address)) + offset)
#define TEAVM_STRING(length, hash, s) { \
.characters = (JavaArray*) & (struct { JavaArray hdr; char16_t data[length]; }) { \
.hdr = { .size = length }, \
.data = s \
}, \
.hashCode = INT32_C(hash) \
}
#define TEAVM_STRING_FROM_CODES(length, hash, ...) { \
.characters = (JavaArray*) & (struct { JavaArray hdr; char16_t data[length]; }) { \
.hdr = { .size = length }, \
.data = { __VA_ARGS__ } \
}, \
.hashCode = INT32_C(hash) \
}
static void** stackTop; static void** stackTop;
static void* gc_gcStorageAddress = NULL; static void* gc_gcStorageAddress = NULL;