Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions droid-api/checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
<suppress checks="ClassFanOutComplexity|ClassDataAbstractionCoupling"
files=".*ProfileInstanceManagerImpl\.java$"/>

<suppress checks="ClassFanOutComplexity|ClassDataAbstractionCoupling"
files=".*DroidAPI\.java$"/>

<!-- suppress requirement for @return on enum, which conflicts with new XDocLint -->
<suppress checks="JavadocMethodCheck"
files=".*ProfileState\.java$"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationMethod;

import java.net.URI;
import java.util.Map;

public class ApiResult {
private final String extension;
Expand All @@ -42,14 +43,24 @@ public class ApiResult {
private final String name;
private final boolean fileExtensionMismatch;
private final URI uri;
private final Map<HashAlgorithm, String> hashResults;

public ApiResult(String extension, IdentificationMethod method, String puid, String name, boolean fileExtensionMismatch, URI uri) {
public ApiResult(
String extension,
IdentificationMethod method,
String puid,
String name,
boolean fileExtensionMismatch,
URI uri,
Map<HashAlgorithm, String> hashResults
) {
this.extension = extension;
this.method = method;
this.puid = puid;
this.name = name;
this.fileExtensionMismatch = fileExtensionMismatch;
this.uri = uri;
this.hashResults = hashResults;
}

public String getName() {
Expand All @@ -72,6 +83,10 @@ public boolean isFileExtensionMismatch() {
return fileExtensionMismatch;
}

public Map<HashAlgorithm, String> getHashResults() {
return hashResults;
}

public URI getUri() {
return uri;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,40 @@
*/
package uk.gov.nationalarchives.droid.internal.api;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.file.Files;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;

import org.apache.http.client.utils.URIBuilder;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Uri;
import software.amazon.awssdk.services.s3.S3Utilities;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.services.s3.model.S3Object;
import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
import uk.gov.nationalarchives.droid.core.SignatureParseException;
import uk.gov.nationalarchives.droid.core.interfaces.*;
import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier;
import uk.gov.nationalarchives.droid.core.interfaces.hash.MD5HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA1HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA256HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA512HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.resource.*;


Expand Down Expand Up @@ -107,8 +113,21 @@ public final class DroidAPI implements AutoCloseable {

private final HttpClient httpClient;


private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion, S3Client s3Client, HttpClient httpClient, Region s3Region) {
private final List<HashAlgorithm> hashAlgorithms;

private DroidAPI(
DroidCore droidCore,
ContainerIdentifier zipIdentifier,
ContainerIdentifier ole2Identifier,
ContainerIdentifier gzIdentifier,
String containerSignatureVersion,
String binarySignatureVersion,
String droidVersion,
S3Client s3Client,
HttpClient httpClient,
Region s3Region,
List<HashAlgorithm> hashAlgorithms
) {
this.droidCore = droidCore;
this.zipIdentifier = zipIdentifier;
this.ole2Identifier = ole2Identifier;
Expand All @@ -119,6 +138,7 @@ private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, Contain
this.s3Region = getRegionOrDefault(s3Region);
this.s3Client = getS3ClientOrDefault(s3Client);
this.httpClient = getHttpClientOrDefault(httpClient);
this.hashAlgorithms = hashAlgorithms;
}

private HttpClient getHttpClientOrDefault(HttpClient httpClient) {
Expand Down Expand Up @@ -154,6 +174,7 @@ public static class DroidAPIBuilder {
private S3Client s3Client;
private Region s3Region;
private HttpClient httpClient;
private List<HashAlgorithm> hashAlgorithms;

public DroidAPIBuilder binarySignature(final Path binarySignature) {
this.binarySignature = binarySignature;
Expand All @@ -180,6 +201,11 @@ public DroidAPIBuilder httpClient(final HttpClient httpClient) {
return this;
}

public DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms) {
this.hashAlgorithms = hashAlgorithms;
return this;
}

public DroidAPI build() throws SignatureParseException {
if (this.binarySignature == null || this.containerSignature == null) {
throw new IllegalArgumentException("Container signature and binary signature are mandatory arguments");
Expand All @@ -192,7 +218,7 @@ public DroidAPI build() throws SignatureParseException {
String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0];
String droidVersion = ResourceBundle.getBundle("options").getString("version_no");
ContainerApi containerApi = new ContainerApi(droidCore, containerSignature);
return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region);
return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region, this.hashAlgorithms);
}
}

Expand Down Expand Up @@ -243,11 +269,61 @@ private List<ApiResult> submitHttpIdentification(final URI uri, String extension

final RequestIdentifier id = getRequestIdentifier(uri);

Map<HashAlgorithm, String> hashResults = generateHashResults(uri, this::getHttpHash);

try (final HttpIdentificationRequest request = new HttpIdentificationRequest(metaData, id, httpClient)) {
request.setExtension(extension);
request.open(uri);
return getApiResults(request);
return getApiResults(request, hashResults);
}
}

private <T> Map<HashAlgorithm, String> generateHashResults(T identifier, BiFunction<HashAlgorithm, T, String> hashFunction) {
return hashAlgorithms.stream().collect(Collectors.toMap(

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potential NPE if hashAlgorithm is not requested by the API user.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good spot. And the test was always passing in an empty list so I didn't notice. The builder will now return an empty list if hasAlgorithms isn't set and I've updated the test to pass null in to check that it works.

algorithm -> algorithm,
algorithm -> hashFunction.apply(algorithm, identifier)
));
}

private String getFileHash(HashAlgorithm hashAlgorithm, Path path) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the FileInputStream staying open here?
Might be easier to use try-with-resource so it closes automatically

        try (InputStream fs = new FileInputStream(path.toFile())){
            return getHash(hashAlgorithm, fs);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I've changed it.

try {
return getHash(hashAlgorithm, new FileInputStream(path.toFile()));
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}

private String getS3Hash(HashAlgorithm algorithm, S3Uri s3Uri) {
String key = s3Uri.key().orElseThrow(() -> new RuntimeException("Key not found in uri " + s3Uri.uri()));
String bucket = s3Uri.bucket().orElseThrow(() -> new RuntimeException("Bucket not found in uri " + s3Uri.uri()));
ResponseInputStream<GetObjectResponse> responseInputStream = s3Client.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build());

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar to above, try-with-resource to close the ResponseInputStream

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed this one as well.

return getHash(algorithm, responseInputStream);
}

private String getHttpHash(HashAlgorithm algorithm, URI httpUri) {
HttpRequest request = HttpRequest.newBuilder()
.uri(httpUri)
.GET()
.build();
try {
try (InputStream responseStream = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()).body()) {
return getHash(algorithm, responseStream);
}
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}

private String getHash(HashAlgorithm algorithm, InputStream inputStream) {
try {
return switch (algorithm) {
case MD5 -> new MD5HashGenerator().hash(inputStream);
case SHA1 -> new SHA1HashGenerator().hash(inputStream);
case SHA256 -> new SHA256HashGenerator().hash(inputStream);
case SHA512 -> new SHA512HashGenerator().hash(inputStream);
};
} catch (IOException e) {
throw new RuntimeException(e);
}
}

Expand All @@ -265,12 +341,14 @@ private List<ApiResult> submitS3Identification(final URI uri, String extension)
throw new RuntimeException(e);
}
S3Uri s3Uri = S3Utilities.builder().region(s3Region).build().parseUri(objectUri);
Map<HashAlgorithm, String> hashResults = generateHashResults(s3Uri, this::getS3Hash);

final RequestIdentifier id = getRequestIdentifier(s3Uri.uri());
RequestMetaData metaData = new RequestMetaData(s3Object.size(), s3Object.lastModified().getEpochSecond(), s3Uri.uri().toString());
try (final S3IdentificationRequest request = new S3IdentificationRequest(metaData, id, s3Client)) {
request.setExtension(extension);
request.open(s3Uri);
apiResults.addAll(getApiResults(request));
apiResults.addAll(getApiResults(request, hashResults));
}
}
return apiResults;
Expand All @@ -293,14 +371,16 @@ private List<ApiResult> submitFileSystemIdentification(final Path file, String e

final RequestIdentifier id = getRequestIdentifier(file.toAbsolutePath().toUri());

Map<HashAlgorithm, String> hashResults = generateHashResults(file, this::getFileHash);

try (final FileSystemIdentificationRequest request = new FileSystemIdentificationRequest(metaData, id)) {
request.setExtension(extension);
request.open(file);
return getApiResults(request);
return getApiResults(request, hashResults);
}
}

private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) throws IOException {
private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request, Map<HashAlgorithm, String> hashResults) throws IOException {
IdentificationResultCollection resultCollection;
String extension = request.getExtension();

Expand All @@ -322,17 +402,17 @@ private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) thro
boolean fileExtensionMismatch = resultCollection.getExtensionMismatch();

return resultCollection.getResults()
.stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri()))
.stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri(), hashResults))
.collect(Collectors.toList());
}

private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) {
private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri, Map<HashAlgorithm, String> hashResults) {
String name = result.getName();
if (result.getMethod().equals(IdentificationMethod.CONTAINER)
&& (droidCore.formatNameByPuid(result.getPuid()) != null)) {
name = droidCore.formatNameByPuid(result.getPuid());
}
return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri);
return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri, hashResults);
}

private <T> IdentificationResultCollection identifyByExtension(final IdentificationRequest<T> identificationRequest) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2016, The National Archives <pronom@nationalarchives.gov.uk>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of the The National Archives nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package uk.gov.nationalarchives.droid.internal.api;

public enum HashAlgorithm {
/** MD5. **/

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these comments necessary?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically they are because of the checkstyle rules but I agree that this is daft so I've disabled that rule for this class.

MD5,
/** SHA1. **/
SHA1,
/** SHA256. **/
SHA256,
/** SHA512. **/
SHA512
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import static org.hamcrest.Matchers.notNullValue;
import static org.junit.jupiter.api.Assertions.*;
import static uk.gov.nationalarchives.droid.internal.api.DroidAPITestUtils.*;
import static uk.gov.nationalarchives.droid.internal.api.HashAlgorithm.*;

public class DroidAPITest {

Expand Down Expand Up @@ -162,6 +163,7 @@ public void should_identify_given_file_with_binary_signature(URI uri) throws IOE
assertThat(identificationResult.getPuid(), is("x-fmt/263"));
assertThat(identificationResult.getName(), is("ZIP Format"));
assertThat(identificationResult.getMethod(), is(IdentificationMethod.BINARY_SIGNATURE));
assertThat(identificationResult.getHashResults().isEmpty(), is(true));

}

Expand Down Expand Up @@ -241,6 +243,31 @@ public void should_return_extension_mismatch_if_extension_passed_does_not_match(
assertThat(result.isFileExtensionMismatch(), is(true));
}

@ParameterizedTest
@MethodSource("docxWithoutExtensionUris")
public void should_return_requested_checksums(URI uri) throws IOException, SignatureParseException {
DroidAPI apiWithChecksums = createApi(endpointOverride, List.of(MD5, SHA1, SHA256, SHA512));
List<ApiResult> results = apiWithChecksums.submit(uri, "docx");
ApiResult result = results.getFirst();

assertThat(result.getHashResults().size(), is(4));
assertThat(result.getHashResults().get(MD5), is("6aff1fe59798e3ab4da40e50b21312ca"));
assertThat(result.getHashResults().get(SHA1), is("51fc5ba38e9762a0a64ef1ebe44b42651ef0799e"));
assertThat(result.getHashResults().get(SHA256), is("f59669d5c045b1a25b09cdd68c6f269901522cbff1fe3c1802bfcc8b25d47e44"));
assertThat(result.getHashResults().get(SHA512), is("64f4c15f9e56064c37b874ddf22958e9983bc9ff5f939d6fc16b06824d25a174e696c39ccd55b8cb1964110e16763fcd8855e319e069416e33249c8c59ed5b81"));
}

@ParameterizedTest
@MethodSource("docxWithoutExtensionUris")
public void should_return_single_checksum_if_one_requested(URI uri) throws IOException, SignatureParseException {
DroidAPI apiWithChecksums = createApi(endpointOverride, List.of(MD5));
List<ApiResult> results = apiWithChecksums.submit(uri, "docx");
ApiResult result = results.getFirst();
result.getHashResults();
assertThat(result.getHashResults().size(), is(1));
assertThat(result.getHashResults().get(MD5), is("6aff1fe59798e3ab4da40e50b21312ca"));
}

@Execution(ExecutionMode.CONCURRENT)
@ParameterizedTest
@MethodSource("correctExtensionUris")
Expand Down
Loading
Loading