Commit 4a07caf2 authored by Gerhard Gossen's avatar Gerhard Gossen
Browse files

Implement crawlTime parameter for /content

parent 0faaa342
......@@ -35,6 +35,9 @@ snapshots of the given URL. You can pass one or more URLs as URL parameters.
GET /content?url=url_1&url=url_2& ... &url=url_n&crawlTime=timestamp
When `crawlTime` is specified (as `yyyy-MM-dd'T'HH:mm:ss`), only the snapshot
closest to that date is retrieved.
The result is a JSON array containing one array for each URL, in the order given
by the parameters. Each of those arrays contains zero or more objects with the
following properties:
......@@ -48,7 +51,6 @@ following properties:
| headers | HTTP headers (JSON object with string keys and values) |
| content | HTTP payload (String for text types, Base64 string otherwise) |
## Setup
To start your own copy of the server, follow the following steps:
......@@ -27,7 +27,7 @@ import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.format.annotation.DateTimeFormat;
import org.springframework.core.convert.converter.Converter;
import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
......@@ -36,6 +36,7 @@ import org.threeten.bp.Duration;
import org.threeten.bp.Instant;
import org.threeten.bp.LocalDateTime;
import org.threeten.bp.ZoneOffset;
import org.threeten.bp.format.DateTimeFormatter;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.datatype.threetenbp.ThreeTenModule;
......@@ -86,6 +87,18 @@ public class Server {
return builder;
Converter<String, Instant> stringToInstantConverter() {
return new Converter<String, Instant>() {
private final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss");
public Instant convert(String source) {
return LocalDateTime.parse(source, formatter).toInstant(ZoneOffset.UTC);
private static final Logger logger = LoggerFactory.getLogger(Server.class);
private final HttpResponseParser responseParser = new HttpResponseParser();
private final CharsetDetector charsetDetector = new OnlyHtmlCharsetDetector();
......@@ -110,13 +123,13 @@ public class Server {
public List<Collection<Snapshot>> getContents(@RequestParam("url") List<String> urls,
@RequestParam(value = "crawlTime", required = false) @DateTimeFormat(pattern = "yyyy-MM-dd'T'HH:mm:ss") LocalDateTime crawlTime)
@RequestParam(value = "crawlTime", required = false) Instant crawlTime)
throws IOException {
List<Collection<Snapshot>> results = new ArrayList<>(urls.size());
for (String url : urls) {
Collection<SnaphotLocation> locations = locator.findLocations(url);
if (crawlTime != null) {
SnaphotLocation closest = Collections.min(locations, new ClosestSnapshot(crawlTime.toInstant(ZoneOffset.UTC)));
SnaphotLocation closest = Collections.min(locations, new ClosestSnapshot(crawlTime));
locations = Collections.singleton(closest);
Collection<Snapshot> snapshots = Collections2.transform(locations,
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment