Commit dd8f6b45 authored by Gerhard Gossen's avatar Gerhard Gossen

Deal with different date formats in ARC headers

parent 8241cb6f
......@@ -150,7 +150,15 @@ public class WARCFileRecordReader extends RecordReader<String, Snapshot> {
try {
if (header instanceof ARCRecordMetaData) {
date = ((ARCRecordMetaData) header).getDate();
timestamp = date != null ? ArchiveUtils.parse14DigitDate(date).getTime() : -1;
if (date != null) {
if (date.length() == 14) {
timestamp = ArchiveUtils.parse14DigitDate(date).getTime();
} else {
timestamp = ArchiveUtils.parse12DigitDate(date).getTime();
}
} else {
timestamp = -1;
}
} else {
date = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_DATE);
timestamp = date != null ? ISODateTimeFormat.dateTimeNoMillis().parseMillis(date) : -1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment