Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.oracle.endeca;
- import java.util.regex.Pattern;
- import org.jetel.component.DataRecordTransform;
- import org.jetel.data.DataField;
- import org.jetel.data.DataRecord;
- import org.jetel.exception.TransformException;
- import org.jetel.metadata.DataFieldType;
- public class CleanStrings extends DataRecordTransform {
- private static final Pattern INVALID_XML_10_CHARACTERS = Pattern
- .compile("[^\\u0009\\u000a\\u000d\\u0020-\\uD7FF\\uE000-"
- + "\\uFFFD\uD800\uDC00-\uDBFF\uDFFF]+");
- @Override
- public int transform(DataRecord[] arg0, DataRecord[] arg1) throws TransformException {
- for (int i = arg0.length - 1; i >= 0; i--) {
- DataRecord rec = arg0[i];
- for (int j = rec.getNumFields() - 1; j >= 0; j--) {
- DataField field = rec.getField(j);
- if (!field.isNull()) {
- if (field.getMetadata().getDataType().equals(DataFieldType.STRING)) {
- String value = field.getValue().toString();
- if ("".equals(value)) {
- arg1[i].getField(j).setNull(true);
- } else {
- value = INVALID_XML_10_CHARACTERS.matcher(value.toString()).replaceAll("");
- arg1[i].getField(j).setValue(value);
- }
- } else {
- arg1[i].getField(j).setValue(field.getValue());
- }
- }
- }
- }
- return OK;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement