-
Notifications
You must be signed in to change notification settings - Fork 81
Add checksum option to DROID API #1341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
1d0fcfa
Merge pull request #1330 from digital-preservation/develop
MancunianSam 2c1487a
Add checksum option to DROID API
MancunianSam ef4944b
Fix checkstyle issues
MancunianSam 928c6f5
Return multiple files results if directories are passed
MancunianSam 2f7e0ce
Initialise the hash algorithms list first
MancunianSam fb68f11
null check in builder
MancunianSam f38cb2e
Code review changes
MancunianSam File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,34 +31,41 @@ | |
| */ | ||
| package uk.gov.nationalarchives.droid.internal.api; | ||
|
|
||
| import java.io.*; | ||
| import java.net.URI; | ||
| import java.net.URISyntaxException; | ||
| import java.net.http.HttpClient; | ||
| import java.net.http.HttpRequest; | ||
| import java.net.http.HttpResponse; | ||
| import java.nio.file.Files; | ||
| import java.io.IOException; | ||
| import java.nio.file.Path; | ||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Optional; | ||
| import java.util.ResourceBundle; | ||
| import java.util.*; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.function.BiFunction; | ||
| import java.util.stream.Collectors; | ||
| import java.util.stream.Stream; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
|
|
||
| import org.apache.http.client.utils.URIBuilder; | ||
| import software.amazon.awssdk.core.ResponseInputStream; | ||
| import software.amazon.awssdk.core.exception.SdkClientException; | ||
| import software.amazon.awssdk.regions.Region; | ||
| import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain; | ||
| import software.amazon.awssdk.services.s3.S3Client; | ||
| import software.amazon.awssdk.services.s3.S3Uri; | ||
| import software.amazon.awssdk.services.s3.S3Utilities; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectRequest; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectResponse; | ||
| import software.amazon.awssdk.services.s3.model.S3Object; | ||
| import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.SignatureParseException; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.*; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.MD5HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA1HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA256HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA512HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.resource.*; | ||
|
|
||
|
|
||
|
|
@@ -107,8 +114,21 @@ public final class DroidAPI implements AutoCloseable { | |
|
|
||
| private final HttpClient httpClient; | ||
|
|
||
|
|
||
| private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion, S3Client s3Client, HttpClient httpClient, Region s3Region) { | ||
| private final List<HashAlgorithm> hashAlgorithms; | ||
|
|
||
| private DroidAPI( | ||
| DroidCore droidCore, | ||
| ContainerIdentifier zipIdentifier, | ||
| ContainerIdentifier ole2Identifier, | ||
| ContainerIdentifier gzIdentifier, | ||
| String containerSignatureVersion, | ||
| String binarySignatureVersion, | ||
| String droidVersion, | ||
| S3Client s3Client, | ||
| HttpClient httpClient, | ||
| Region s3Region, | ||
| List<HashAlgorithm> hashAlgorithms | ||
| ) { | ||
| this.droidCore = droidCore; | ||
| this.zipIdentifier = zipIdentifier; | ||
| this.ole2Identifier = ole2Identifier; | ||
|
|
@@ -119,8 +139,14 @@ private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, Contain | |
| this.s3Region = getRegionOrDefault(s3Region); | ||
| this.s3Client = getS3ClientOrDefault(s3Client); | ||
| this.httpClient = getHttpClientOrDefault(httpClient); | ||
| this.hashAlgorithms = hashAlgorithms; | ||
| } | ||
|
|
||
| public record APIResult(List<APIIdentificationResult> identificationResults, Map<HashAlgorithm, String> hashResults) {} | ||
|
|
||
| public record APIIdentificationResult(String extension, IdentificationMethod method, String puid, String name, | ||
| boolean fileExtensionMismatch, URI uri) { } | ||
|
|
||
| private HttpClient getHttpClientOrDefault(HttpClient httpClient) { | ||
| return Optional.ofNullable(httpClient) | ||
| .orElse(HttpClient.newHttpClient()); | ||
|
|
@@ -154,6 +180,7 @@ public static class DroidAPIBuilder { | |
| private S3Client s3Client; | ||
| private Region s3Region; | ||
| private HttpClient httpClient; | ||
| private List<HashAlgorithm> hashAlgorithms = Collections.emptyList(); | ||
|
|
||
| public DroidAPIBuilder binarySignature(final Path binarySignature) { | ||
| this.binarySignature = binarySignature; | ||
|
|
@@ -180,6 +207,11 @@ public DroidAPIBuilder httpClient(final HttpClient httpClient) { | |
| return this; | ||
| } | ||
|
|
||
| public DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms) { | ||
| this.hashAlgorithms = hashAlgorithms == null ? Collections.emptyList() : hashAlgorithms; | ||
| return this; | ||
| } | ||
|
|
||
| public DroidAPI build() throws SignatureParseException { | ||
| if (this.binarySignature == null || this.containerSignature == null) { | ||
| throw new IllegalArgumentException("Container signature and binary signature are mandatory arguments"); | ||
|
|
@@ -192,7 +224,7 @@ public DroidAPI build() throws SignatureParseException { | |
| String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0]; | ||
| String droidVersion = ResourceBundle.getBundle("options").getString("version_no"); | ||
| ContainerApi containerApi = new ContainerApi(droidCore, containerSignature); | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region); | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region, this.hashAlgorithms); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -207,7 +239,7 @@ public static DroidAPIBuilder builder() { | |
| * @return File identification result. File can have multiple matching signatures. | ||
| * @throws IOException If File can't be read or there is IO error. | ||
| */ | ||
| public List<ApiResult> submit(final URI uri, String extension) throws IOException { | ||
| public List<APIResult> submit(final URI uri, String extension) throws IOException { | ||
| if (S3_SCHEME.equals(uri.getScheme())) { | ||
| return submitS3Identification(uri, extension); | ||
| } else if (List.of("http", "https").contains(uri.getScheme())) { | ||
|
|
@@ -224,11 +256,11 @@ public List<ApiResult> submit(final URI uri, String extension) throws IOExceptio | |
| * @return File identification result. File can have multiple matching signatures. | ||
| * @throws IOException If File can't be read or there is IO error. | ||
| */ | ||
| public List<ApiResult> submit(final URI uri) throws IOException { | ||
| public List<APIResult> submit(final URI uri) throws IOException { | ||
| return submit(uri, null); | ||
| } | ||
|
|
||
| private List<ApiResult> submitHttpIdentification(final URI uri, String extension) throws IOException { | ||
| private List<APIResult> submitHttpIdentification(final URI uri, String extension) throws IOException { | ||
| HttpClient httpClient = this.httpClient == null ? HttpClient.newHttpClient() : this.httpClient; | ||
| HttpUtils httpUtils = new HttpUtils(httpClient); | ||
| HttpUtils.HttpMetadata httpMetadata = httpUtils.getHttpMetadata(uri); | ||
|
|
@@ -243,18 +275,72 @@ private List<ApiResult> submitHttpIdentification(final URI uri, String extension | |
|
|
||
| final RequestIdentifier id = getRequestIdentifier(uri); | ||
|
|
||
| Map<HashAlgorithm, String> hashResults = generateHashResults(uri, this::getHttpHash); | ||
|
|
||
| try (final HttpIdentificationRequest request = new HttpIdentificationRequest(metaData, id, httpClient)) { | ||
| request.setExtension(extension); | ||
| request.open(uri); | ||
| return getApiResults(request); | ||
| return List.of(new APIResult(getIdentificationResults(request), hashResults)); | ||
| } | ||
| } | ||
|
|
||
| private List<ApiResult> submitS3Identification(final URI uri, String extension) throws IOException { | ||
| private <T> Map<HashAlgorithm, String> generateHashResults(T identifier, BiFunction<HashAlgorithm, T, String> hashFunction) { | ||
| return hashAlgorithms.stream().collect(Collectors.toMap( | ||
| algorithm -> algorithm, | ||
| algorithm -> hashFunction.apply(algorithm, identifier) | ||
| )); | ||
| } | ||
|
|
||
| private String getFileHash(HashAlgorithm hashAlgorithm, Path path) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the FileInputStream staying open here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I've changed it. |
||
| try (InputStream fileInputStream = new FileInputStream(path.toFile())) { | ||
| return getHash(hashAlgorithm, fileInputStream); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getS3Hash(HashAlgorithm algorithm, S3Uri s3Uri) { | ||
| String key = s3Uri.key().orElseThrow(() -> new RuntimeException("Key not found in uri " + s3Uri.uri())); | ||
| String bucket = s3Uri.bucket().orElseThrow(() -> new RuntimeException("Bucket not found in uri " + s3Uri.uri())); | ||
|
|
||
| try (ResponseInputStream<GetObjectResponse> responseInputStream = s3Client.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build())) { | ||
| return getHash(algorithm, responseInputStream); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getHttpHash(HashAlgorithm algorithm, URI httpUri) { | ||
| HttpRequest request = HttpRequest.newBuilder() | ||
| .uri(httpUri) | ||
| .GET() | ||
| .build(); | ||
| try { | ||
| try (InputStream responseStream = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()).body()) { | ||
| return getHash(algorithm, responseStream); | ||
| } | ||
| } catch (IOException | InterruptedException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getHash(HashAlgorithm algorithm, InputStream inputStream) { | ||
| try { | ||
| return switch (algorithm) { | ||
| case MD5 -> new MD5HashGenerator().hash(inputStream); | ||
| case SHA1 -> new SHA1HashGenerator().hash(inputStream); | ||
| case SHA256 -> new SHA256HashGenerator().hash(inputStream); | ||
| case SHA512 -> new SHA512HashGenerator().hash(inputStream); | ||
| }; | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private List<APIResult> submitS3Identification(final URI uri, String extension) throws IOException { | ||
| S3Utils s3Utils = new S3Utils(s3Client); | ||
| S3Utils.S3ObjectList objectList = s3Utils.listObjects(uri); | ||
| List<ApiResult> apiResults = new ArrayList<>(); | ||
| List<APIResult> apiResults = new ArrayList<>(); | ||
|
|
||
| for (S3Object s3Object: objectList.contents()) { | ||
| URIBuilder uriBuilder = new URIBuilder(); | ||
|
|
@@ -265,12 +351,14 @@ private List<ApiResult> submitS3Identification(final URI uri, String extension) | |
| throw new RuntimeException(e); | ||
| } | ||
| S3Uri s3Uri = S3Utilities.builder().region(s3Region).build().parseUri(objectUri); | ||
| Map<HashAlgorithm, String> hashResults = generateHashResults(s3Uri, this::getS3Hash); | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(s3Uri.uri()); | ||
| RequestMetaData metaData = new RequestMetaData(s3Object.size(), s3Object.lastModified().getEpochSecond(), s3Uri.uri().toString()); | ||
| try (final S3IdentificationRequest request = new S3IdentificationRequest(metaData, id, s3Client)) { | ||
| request.setExtension(extension); | ||
| request.open(s3Uri); | ||
| apiResults.addAll(getApiResults(request)); | ||
| apiResults.add(new APIResult(getIdentificationResults(request), hashResults)); | ||
| } | ||
| } | ||
| return apiResults; | ||
|
|
@@ -284,23 +372,45 @@ private static RequestIdentifier getRequestIdentifier(URI uri) { | |
| } | ||
|
|
||
|
|
||
| private List<ApiResult> submitFileSystemIdentification(final Path file, String extension) throws IOException { | ||
| final RequestMetaData metaData = new RequestMetaData( | ||
| Files.size(file), | ||
| Files.getLastModifiedTime(file).toMillis(), | ||
| file.toAbsolutePath().toString() | ||
| ); | ||
| private List<APIResult> submitFileSystemIdentification(final Path file, String extension) throws IOException { | ||
| if (Files.isDirectory(file)) { | ||
| try (Stream<Path> files = Files.walk(file)) { | ||
| return files.filter(Files::isRegularFile) | ||
| .map(eachFile -> getApiResultForFile(extension, eachFile)) | ||
| .toList(); | ||
| } | ||
|
|
||
| } else { | ||
| return List.of(getApiResultForFile(extension, file)); | ||
| } | ||
| } | ||
|
|
||
| private APIResult getApiResultForFile(String extension, Path eachFile) { | ||
| final RequestMetaData metaData; | ||
| try { | ||
| metaData = new RequestMetaData( | ||
| Files.size(eachFile), | ||
| Files.getLastModifiedTime(eachFile).toMillis(), | ||
| eachFile.toAbsolutePath().toString() | ||
| ); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(eachFile.toAbsolutePath().toUri()); | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(file.toAbsolutePath().toUri()); | ||
| Map<HashAlgorithm, String> hashResults = generateHashResults(eachFile, this::getFileHash); | ||
|
|
||
| try (final FileSystemIdentificationRequest request = new FileSystemIdentificationRequest(metaData, id)) { | ||
| request.setExtension(extension); | ||
| request.open(file); | ||
| return getApiResults(request); | ||
| request.open(eachFile); | ||
| return new APIResult(getIdentificationResults(request), hashResults); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) throws IOException { | ||
| private <T> List<APIIdentificationResult> getIdentificationResults(IdentificationRequest<T> request) throws IOException { | ||
| IdentificationResultCollection resultCollection; | ||
| String extension = request.getExtension(); | ||
|
|
||
|
|
@@ -322,17 +432,17 @@ private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) thro | |
| boolean fileExtensionMismatch = resultCollection.getExtensionMismatch(); | ||
|
|
||
| return resultCollection.getResults() | ||
| .stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri())) | ||
| .stream().map(res -> createIdentificationResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri())) | ||
| .collect(Collectors.toList()); | ||
| } | ||
|
|
||
| private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) { | ||
| private APIIdentificationResult createIdentificationResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) { | ||
| String name = result.getName(); | ||
| if (result.getMethod().equals(IdentificationMethod.CONTAINER) | ||
| && (droidCore.formatNameByPuid(result.getPuid()) != null)) { | ||
| name = droidCore.formatNameByPuid(result.getPuid()); | ||
| } | ||
| return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri); | ||
| return new APIIdentificationResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri); | ||
| } | ||
|
|
||
| private <T> IdentificationResultCollection identifyByExtension(final IdentificationRequest<T> identificationRequest) { | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
potential NPE if hashAlgorithm is not requested by the API user.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah good spot. And the test was always passing in an empty list so I didn't notice. The builder will now return an empty list if hasAlgorithms isn't set and I've updated the test to pass null in to check that it works.