-
Notifications
You must be signed in to change notification settings - Fork 81
Add checksum option to DROID API #1341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
1d0fcfa
2c1487a
ef4944b
928c6f5
2f7e0ce
fb68f11
f38cb2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,34 +31,40 @@ | |
| */ | ||
| package uk.gov.nationalarchives.droid.internal.api; | ||
|
|
||
| import java.io.*; | ||
| import java.net.URI; | ||
| import java.net.URISyntaxException; | ||
| import java.net.http.HttpClient; | ||
| import java.net.http.HttpRequest; | ||
| import java.net.http.HttpResponse; | ||
| import java.nio.file.Files; | ||
| import java.io.IOException; | ||
| import java.nio.file.Path; | ||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Optional; | ||
| import java.util.ResourceBundle; | ||
| import java.util.*; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.function.BiFunction; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
|
|
||
| import org.apache.http.client.utils.URIBuilder; | ||
| import software.amazon.awssdk.core.ResponseInputStream; | ||
| import software.amazon.awssdk.core.exception.SdkClientException; | ||
| import software.amazon.awssdk.regions.Region; | ||
| import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain; | ||
| import software.amazon.awssdk.services.s3.S3Client; | ||
| import software.amazon.awssdk.services.s3.S3Uri; | ||
| import software.amazon.awssdk.services.s3.S3Utilities; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectRequest; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectResponse; | ||
| import software.amazon.awssdk.services.s3.model.S3Object; | ||
| import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.SignatureParseException; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.*; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.MD5HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA1HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA256HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA512HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.resource.*; | ||
|
|
||
|
|
||
|
|
@@ -107,8 +113,21 @@ public final class DroidAPI implements AutoCloseable { | |
|
|
||
| private final HttpClient httpClient; | ||
|
|
||
|
|
||
| private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion, S3Client s3Client, HttpClient httpClient, Region s3Region) { | ||
| private final List<HashAlgorithm> hashAlgorithms; | ||
|
|
||
| private DroidAPI( | ||
| DroidCore droidCore, | ||
| ContainerIdentifier zipIdentifier, | ||
| ContainerIdentifier ole2Identifier, | ||
| ContainerIdentifier gzIdentifier, | ||
| String containerSignatureVersion, | ||
| String binarySignatureVersion, | ||
| String droidVersion, | ||
| S3Client s3Client, | ||
| HttpClient httpClient, | ||
| Region s3Region, | ||
| List<HashAlgorithm> hashAlgorithms | ||
| ) { | ||
| this.droidCore = droidCore; | ||
| this.zipIdentifier = zipIdentifier; | ||
| this.ole2Identifier = ole2Identifier; | ||
|
|
@@ -119,6 +138,7 @@ private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, Contain | |
| this.s3Region = getRegionOrDefault(s3Region); | ||
| this.s3Client = getS3ClientOrDefault(s3Client); | ||
| this.httpClient = getHttpClientOrDefault(httpClient); | ||
| this.hashAlgorithms = hashAlgorithms; | ||
| } | ||
|
|
||
| private HttpClient getHttpClientOrDefault(HttpClient httpClient) { | ||
|
|
@@ -154,6 +174,7 @@ public static class DroidAPIBuilder { | |
| private S3Client s3Client; | ||
| private Region s3Region; | ||
| private HttpClient httpClient; | ||
| private List<HashAlgorithm> hashAlgorithms; | ||
|
|
||
| public DroidAPIBuilder binarySignature(final Path binarySignature) { | ||
| this.binarySignature = binarySignature; | ||
|
|
@@ -180,6 +201,11 @@ public DroidAPIBuilder httpClient(final HttpClient httpClient) { | |
| return this; | ||
| } | ||
|
|
||
| public DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms) { | ||
| this.hashAlgorithms = hashAlgorithms; | ||
| return this; | ||
| } | ||
|
|
||
| public DroidAPI build() throws SignatureParseException { | ||
| if (this.binarySignature == null || this.containerSignature == null) { | ||
| throw new IllegalArgumentException("Container signature and binary signature are mandatory arguments"); | ||
|
|
@@ -192,7 +218,7 @@ public DroidAPI build() throws SignatureParseException { | |
| String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0]; | ||
| String droidVersion = ResourceBundle.getBundle("options").getString("version_no"); | ||
| ContainerApi containerApi = new ContainerApi(droidCore, containerSignature); | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region); | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region, this.hashAlgorithms); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -243,11 +269,61 @@ private List<ApiResult> submitHttpIdentification(final URI uri, String extension | |
|
|
||
| final RequestIdentifier id = getRequestIdentifier(uri); | ||
|
|
||
| Map<HashAlgorithm, String> hashResults = generateHashResults(uri, this::getHttpHash); | ||
|
|
||
| try (final HttpIdentificationRequest request = new HttpIdentificationRequest(metaData, id, httpClient)) { | ||
| request.setExtension(extension); | ||
| request.open(uri); | ||
| return getApiResults(request); | ||
| return getApiResults(request, hashResults); | ||
| } | ||
| } | ||
|
|
||
| private <T> Map<HashAlgorithm, String> generateHashResults(T identifier, BiFunction<HashAlgorithm, T, String> hashFunction) { | ||
| return hashAlgorithms.stream().collect(Collectors.toMap( | ||
| algorithm -> algorithm, | ||
| algorithm -> hashFunction.apply(algorithm, identifier) | ||
| )); | ||
| } | ||
|
|
||
| private String getFileHash(HashAlgorithm hashAlgorithm, Path path) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the FileInputStream staying open here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I've changed it. |
||
| try { | ||
| return getHash(hashAlgorithm, new FileInputStream(path.toFile())); | ||
| } catch (FileNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getS3Hash(HashAlgorithm algorithm, S3Uri s3Uri) { | ||
| String key = s3Uri.key().orElseThrow(() -> new RuntimeException("Key not found in uri " + s3Uri.uri())); | ||
| String bucket = s3Uri.bucket().orElseThrow(() -> new RuntimeException("Bucket not found in uri " + s3Uri.uri())); | ||
| ResponseInputStream<GetObjectResponse> responseInputStream = s3Client.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar to above, try-with-resource to close the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've changed this one as well. |
||
| return getHash(algorithm, responseInputStream); | ||
| } | ||
|
|
||
| private String getHttpHash(HashAlgorithm algorithm, URI httpUri) { | ||
| HttpRequest request = HttpRequest.newBuilder() | ||
| .uri(httpUri) | ||
| .GET() | ||
| .build(); | ||
| try { | ||
| try (InputStream responseStream = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()).body()) { | ||
| return getHash(algorithm, responseStream); | ||
| } | ||
| } catch (IOException | InterruptedException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getHash(HashAlgorithm algorithm, InputStream inputStream) { | ||
| try { | ||
| return switch (algorithm) { | ||
| case MD5 -> new MD5HashGenerator().hash(inputStream); | ||
| case SHA1 -> new SHA1HashGenerator().hash(inputStream); | ||
| case SHA256 -> new SHA256HashGenerator().hash(inputStream); | ||
| case SHA512 -> new SHA512HashGenerator().hash(inputStream); | ||
| }; | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -265,12 +341,14 @@ private List<ApiResult> submitS3Identification(final URI uri, String extension) | |
| throw new RuntimeException(e); | ||
| } | ||
| S3Uri s3Uri = S3Utilities.builder().region(s3Region).build().parseUri(objectUri); | ||
| Map<HashAlgorithm, String> hashResults = generateHashResults(s3Uri, this::getS3Hash); | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(s3Uri.uri()); | ||
| RequestMetaData metaData = new RequestMetaData(s3Object.size(), s3Object.lastModified().getEpochSecond(), s3Uri.uri().toString()); | ||
| try (final S3IdentificationRequest request = new S3IdentificationRequest(metaData, id, s3Client)) { | ||
| request.setExtension(extension); | ||
| request.open(s3Uri); | ||
| apiResults.addAll(getApiResults(request)); | ||
| apiResults.addAll(getApiResults(request, hashResults)); | ||
| } | ||
| } | ||
| return apiResults; | ||
|
|
@@ -293,14 +371,16 @@ private List<ApiResult> submitFileSystemIdentification(final Path file, String e | |
|
|
||
| final RequestIdentifier id = getRequestIdentifier(file.toAbsolutePath().toUri()); | ||
|
|
||
| Map<HashAlgorithm, String> hashResults = generateHashResults(file, this::getFileHash); | ||
|
|
||
| try (final FileSystemIdentificationRequest request = new FileSystemIdentificationRequest(metaData, id)) { | ||
| request.setExtension(extension); | ||
| request.open(file); | ||
| return getApiResults(request); | ||
| return getApiResults(request, hashResults); | ||
| } | ||
| } | ||
|
|
||
| private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) throws IOException { | ||
| private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request, Map<HashAlgorithm, String> hashResults) throws IOException { | ||
| IdentificationResultCollection resultCollection; | ||
| String extension = request.getExtension(); | ||
|
|
||
|
|
@@ -322,17 +402,17 @@ private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) thro | |
| boolean fileExtensionMismatch = resultCollection.getExtensionMismatch(); | ||
|
|
||
| return resultCollection.getResults() | ||
| .stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri())) | ||
| .stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri(), hashResults)) | ||
| .collect(Collectors.toList()); | ||
| } | ||
|
|
||
| private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) { | ||
| private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri, Map<HashAlgorithm, String> hashResults) { | ||
| String name = result.getName(); | ||
| if (result.getMethod().equals(IdentificationMethod.CONTAINER) | ||
| && (droidCore.formatNameByPuid(result.getPuid()) != null)) { | ||
| name = droidCore.formatNameByPuid(result.getPuid()); | ||
| } | ||
| return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri); | ||
| return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri, hashResults); | ||
| } | ||
|
|
||
| private <T> IdentificationResultCollection identifyByExtension(final IdentificationRequest<T> identificationRequest) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| /* | ||
| * Copyright (c) 2016, The National Archives <pronom@nationalarchives.gov.uk> | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following | ||
| * conditions are met: | ||
| * | ||
| * * Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * | ||
| * * Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in the | ||
| * documentation and/or other materials provided with the distribution. | ||
| * | ||
| * * Neither the name of the The National Archives nor the | ||
| * names of its contributors may be used to endorse or promote products | ||
| * derived from this software without specific prior written permission. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | ||
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
| package uk.gov.nationalarchives.droid.internal.api; | ||
|
|
||
| public enum HashAlgorithm { | ||
| /** MD5. **/ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these comments necessary?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically they are because of the checkstyle rules but I agree that this is daft so I've disabled that rule for this class. |
||
| MD5, | ||
| /** SHA1. **/ | ||
| SHA1, | ||
| /** SHA256. **/ | ||
| SHA256, | ||
| /** SHA512. **/ | ||
| SHA512 | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
potential NPE if hashAlgorithm is not requested by the API user.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah good spot. And the test was always passing in an empty list so I didn't notice. The builder will now return an empty list if hasAlgorithms isn't set and I've updated the test to pass null in to check that it works.