-
Notifications
You must be signed in to change notification settings - Fork 81
Add checksum option to DROID API #1341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
1d0fcfa
2c1487a
ef4944b
928c6f5
2f7e0ce
fb68f11
f38cb2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,34 +31,40 @@ | |
| */ | ||
| package uk.gov.nationalarchives.droid.internal.api; | ||
|
|
||
| import java.io.*; | ||
| import java.net.URI; | ||
| import java.net.URISyntaxException; | ||
| import java.net.http.HttpClient; | ||
| import java.net.http.HttpRequest; | ||
| import java.net.http.HttpResponse; | ||
| import java.nio.file.Files; | ||
| import java.io.IOException; | ||
| import java.nio.file.Path; | ||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Optional; | ||
| import java.util.ResourceBundle; | ||
| import java.util.*; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.function.BiFunction; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
|
|
||
| import org.apache.http.client.utils.URIBuilder; | ||
| import software.amazon.awssdk.core.ResponseInputStream; | ||
| import software.amazon.awssdk.core.exception.SdkClientException; | ||
| import software.amazon.awssdk.regions.Region; | ||
| import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain; | ||
| import software.amazon.awssdk.services.s3.S3Client; | ||
| import software.amazon.awssdk.services.s3.S3Uri; | ||
| import software.amazon.awssdk.services.s3.S3Utilities; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectRequest; | ||
| import software.amazon.awssdk.services.s3.model.GetObjectResponse; | ||
| import software.amazon.awssdk.services.s3.model.S3Object; | ||
| import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.SignatureParseException; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.*; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.MD5HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA1HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA256HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA512HashGenerator; | ||
| import uk.gov.nationalarchives.droid.core.interfaces.resource.*; | ||
|
|
||
|
|
||
|
|
@@ -107,8 +113,21 @@ public final class DroidAPI implements AutoCloseable { | |
|
|
||
| private final HttpClient httpClient; | ||
|
|
||
|
|
||
| private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion, S3Client s3Client, HttpClient httpClient, Region s3Region) { | ||
| private final List<HashAlgorithm> hashAlgorithms; | ||
|
|
||
| private DroidAPI( | ||
| DroidCore droidCore, | ||
| ContainerIdentifier zipIdentifier, | ||
| ContainerIdentifier ole2Identifier, | ||
| ContainerIdentifier gzIdentifier, | ||
| String containerSignatureVersion, | ||
| String binarySignatureVersion, | ||
| String droidVersion, | ||
| S3Client s3Client, | ||
| HttpClient httpClient, | ||
| Region s3Region, | ||
| List<HashAlgorithm> hashAlgorithms | ||
| ) { | ||
| this.droidCore = droidCore; | ||
| this.zipIdentifier = zipIdentifier; | ||
| this.ole2Identifier = ole2Identifier; | ||
|
|
@@ -119,8 +138,14 @@ private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, Contain | |
| this.s3Region = getRegionOrDefault(s3Region); | ||
| this.s3Client = getS3ClientOrDefault(s3Client); | ||
| this.httpClient = getHttpClientOrDefault(httpClient); | ||
| this.hashAlgorithms = hashAlgorithms; | ||
| } | ||
|
|
||
| public record APIResult(List<IdentificationResult> identificationResults, Map<HashAlgorithm, String> hashResults) {} | ||
|
|
||
| public record IdentificationResult(String extension, IdentificationMethod method, String puid, String name, | ||
| boolean fileExtensionMismatch, URI uri) { } | ||
|
|
||
| private HttpClient getHttpClientOrDefault(HttpClient httpClient) { | ||
| return Optional.ofNullable(httpClient) | ||
| .orElse(HttpClient.newHttpClient()); | ||
|
|
@@ -154,6 +179,7 @@ public static class DroidAPIBuilder { | |
| private S3Client s3Client; | ||
| private Region s3Region; | ||
| private HttpClient httpClient; | ||
| private List<HashAlgorithm> hashAlgorithms; | ||
|
|
||
| public DroidAPIBuilder binarySignature(final Path binarySignature) { | ||
| this.binarySignature = binarySignature; | ||
|
|
@@ -180,6 +206,11 @@ public DroidAPIBuilder httpClient(final HttpClient httpClient) { | |
| return this; | ||
| } | ||
|
|
||
| public DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms) { | ||
| this.hashAlgorithms = hashAlgorithms; | ||
| return this; | ||
| } | ||
|
|
||
| public DroidAPI build() throws SignatureParseException { | ||
| if (this.binarySignature == null || this.containerSignature == null) { | ||
| throw new IllegalArgumentException("Container signature and binary signature are mandatory arguments"); | ||
|
|
@@ -192,7 +223,8 @@ public DroidAPI build() throws SignatureParseException { | |
| String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0]; | ||
| String droidVersion = ResourceBundle.getBundle("options").getString("version_no"); | ||
| ContainerApi containerApi = new ContainerApi(droidCore, containerSignature); | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region); | ||
| List<HashAlgorithm> hashAlgorithmsOrEmptyList = this.hashAlgorithms == null ? Collections.emptyList() : this.hashAlgorithms; | ||
| return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region, hashAlgorithmsOrEmptyList); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -207,7 +239,7 @@ public static DroidAPIBuilder builder() { | |
| * @return File identification result. File can have multiple matching signatures. | ||
| * @throws IOException If File can't be read or there is IO error. | ||
| */ | ||
| public List<ApiResult> submit(final URI uri, String extension) throws IOException { | ||
| public List<APIResult> submit(final URI uri, String extension) throws IOException { | ||
| if (S3_SCHEME.equals(uri.getScheme())) { | ||
| return submitS3Identification(uri, extension); | ||
| } else if (List.of("http", "https").contains(uri.getScheme())) { | ||
|
|
@@ -224,11 +256,11 @@ public List<ApiResult> submit(final URI uri, String extension) throws IOExceptio | |
| * @return File identification result. File can have multiple matching signatures. | ||
| * @throws IOException If File can't be read or there is IO error. | ||
| */ | ||
| public List<ApiResult> submit(final URI uri) throws IOException { | ||
| public List<APIResult> submit(final URI uri) throws IOException { | ||
| return submit(uri, null); | ||
| } | ||
|
|
||
| private List<ApiResult> submitHttpIdentification(final URI uri, String extension) throws IOException { | ||
| private List<APIResult> submitHttpIdentification(final URI uri, String extension) throws IOException { | ||
| HttpClient httpClient = this.httpClient == null ? HttpClient.newHttpClient() : this.httpClient; | ||
| HttpUtils httpUtils = new HttpUtils(httpClient); | ||
| HttpUtils.HttpMetadata httpMetadata = httpUtils.getHttpMetadata(uri); | ||
|
|
@@ -243,18 +275,68 @@ private List<ApiResult> submitHttpIdentification(final URI uri, String extension | |
|
|
||
| final RequestIdentifier id = getRequestIdentifier(uri); | ||
|
|
||
| Map<HashAlgorithm, String> hashResults = generateHashResults(uri, this::getHttpHash); | ||
|
|
||
| try (final HttpIdentificationRequest request = new HttpIdentificationRequest(metaData, id, httpClient)) { | ||
| request.setExtension(extension); | ||
| request.open(uri); | ||
| return getApiResults(request); | ||
| return List.of(new APIResult(getIdentificationResults(request), hashResults)); | ||
| } | ||
| } | ||
|
|
||
| private List<ApiResult> submitS3Identification(final URI uri, String extension) throws IOException { | ||
| private <T> Map<HashAlgorithm, String> generateHashResults(T identifier, BiFunction<HashAlgorithm, T, String> hashFunction) { | ||
| return hashAlgorithms.stream().collect(Collectors.toMap( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. potential NPE if hashAlgorithm is not requested by the API user.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah good spot. And the test was always passing in an empty list so I didn't notice. The builder will now return an empty list if hasAlgorithms isn't set and I've updated the test to pass null in to check that it works. |
||
| algorithm -> algorithm, | ||
| algorithm -> hashFunction.apply(algorithm, identifier) | ||
| )); | ||
| } | ||
|
|
||
| private String getFileHash(HashAlgorithm hashAlgorithm, Path path) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the FileInputStream staying open here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I've changed it. |
||
| try { | ||
| return getHash(hashAlgorithm, new FileInputStream(path.toFile())); | ||
| } catch (FileNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getS3Hash(HashAlgorithm algorithm, S3Uri s3Uri) { | ||
| String key = s3Uri.key().orElseThrow(() -> new RuntimeException("Key not found in uri " + s3Uri.uri())); | ||
| String bucket = s3Uri.bucket().orElseThrow(() -> new RuntimeException("Bucket not found in uri " + s3Uri.uri())); | ||
| ResponseInputStream<GetObjectResponse> responseInputStream = s3Client.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar to above, try-with-resource to close the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've changed this one as well. |
||
| return getHash(algorithm, responseInputStream); | ||
| } | ||
|
|
||
| private String getHttpHash(HashAlgorithm algorithm, URI httpUri) { | ||
| HttpRequest request = HttpRequest.newBuilder() | ||
| .uri(httpUri) | ||
| .GET() | ||
| .build(); | ||
| try { | ||
| try (InputStream responseStream = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()).body()) { | ||
| return getHash(algorithm, responseStream); | ||
| } | ||
| } catch (IOException | InterruptedException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private String getHash(HashAlgorithm algorithm, InputStream inputStream) { | ||
| try { | ||
| return switch (algorithm) { | ||
| case MD5 -> new MD5HashGenerator().hash(inputStream); | ||
| case SHA1 -> new SHA1HashGenerator().hash(inputStream); | ||
| case SHA256 -> new SHA256HashGenerator().hash(inputStream); | ||
| case SHA512 -> new SHA512HashGenerator().hash(inputStream); | ||
| }; | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private List<APIResult> submitS3Identification(final URI uri, String extension) throws IOException { | ||
| S3Utils s3Utils = new S3Utils(s3Client); | ||
| S3Utils.S3ObjectList objectList = s3Utils.listObjects(uri); | ||
| List<ApiResult> apiResults = new ArrayList<>(); | ||
| List<APIResult> apiResults = new ArrayList<>(); | ||
|
|
||
| for (S3Object s3Object: objectList.contents()) { | ||
| URIBuilder uriBuilder = new URIBuilder(); | ||
|
|
@@ -265,12 +347,14 @@ private List<ApiResult> submitS3Identification(final URI uri, String extension) | |
| throw new RuntimeException(e); | ||
| } | ||
| S3Uri s3Uri = S3Utilities.builder().region(s3Region).build().parseUri(objectUri); | ||
| Map<HashAlgorithm, String> hashResults = generateHashResults(s3Uri, this::getS3Hash); | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(s3Uri.uri()); | ||
| RequestMetaData metaData = new RequestMetaData(s3Object.size(), s3Object.lastModified().getEpochSecond(), s3Uri.uri().toString()); | ||
| try (final S3IdentificationRequest request = new S3IdentificationRequest(metaData, id, s3Client)) { | ||
| request.setExtension(extension); | ||
| request.open(s3Uri); | ||
| apiResults.addAll(getApiResults(request)); | ||
| apiResults.add(new APIResult(getIdentificationResults(request), hashResults)); | ||
| } | ||
| } | ||
| return apiResults; | ||
|
|
@@ -284,23 +368,42 @@ private static RequestIdentifier getRequestIdentifier(URI uri) { | |
| } | ||
|
|
||
|
|
||
| private List<ApiResult> submitFileSystemIdentification(final Path file, String extension) throws IOException { | ||
| final RequestMetaData metaData = new RequestMetaData( | ||
| Files.size(file), | ||
| Files.getLastModifiedTime(file).toMillis(), | ||
| file.toAbsolutePath().toString() | ||
| ); | ||
| private List<APIResult> submitFileSystemIdentification(final Path file, String extension) throws IOException { | ||
| if (Files.isDirectory(file)) { | ||
| return Files.walk(file).filter(Files::isRegularFile) | ||
| .map(eachFile -> getApiResultForFile(extension, eachFile)) | ||
| .toList(); | ||
| } else { | ||
| return List.of(getApiResultForFile(extension, file)); | ||
| } | ||
| } | ||
|
|
||
| private APIResult getApiResultForFile(String extension, Path eachFile) { | ||
| final RequestMetaData metaData; | ||
| try { | ||
| metaData = new RequestMetaData( | ||
| Files.size(eachFile), | ||
| Files.getLastModifiedTime(eachFile).toMillis(), | ||
| eachFile.toAbsolutePath().toString() | ||
| ); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(eachFile.toAbsolutePath().toUri()); | ||
|
|
||
| final RequestIdentifier id = getRequestIdentifier(file.toAbsolutePath().toUri()); | ||
| Map<HashAlgorithm, String> hashResults = generateHashResults(eachFile, this::getFileHash); | ||
|
|
||
| try (final FileSystemIdentificationRequest request = new FileSystemIdentificationRequest(metaData, id)) { | ||
| request.setExtension(extension); | ||
| request.open(file); | ||
| return getApiResults(request); | ||
| request.open(eachFile); | ||
| return new APIResult(getIdentificationResults(request), hashResults); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) throws IOException { | ||
| private <T> List<IdentificationResult> getIdentificationResults(IdentificationRequest<T> request) throws IOException { | ||
| IdentificationResultCollection resultCollection; | ||
| String extension = request.getExtension(); | ||
|
|
||
|
|
@@ -322,17 +425,17 @@ private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) thro | |
| boolean fileExtensionMismatch = resultCollection.getExtensionMismatch(); | ||
|
|
||
| return resultCollection.getResults() | ||
| .stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri())) | ||
| .stream().map(res -> createIdentificationResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri())) | ||
| .collect(Collectors.toList()); | ||
| } | ||
|
|
||
| private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) { | ||
| private IdentificationResult createIdentificationResult(uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult result, String extension, boolean extensionMismatch, URI uri) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't you just use IdentificationResult or interfaces.IdentificationResult?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not with Java I don't think. This is a different class with the same name as the record I created. I could rename the record but I don't really want to. I've passed in the three parameters I need instead of using this class.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I've changed my mind, I've renamed the record APIIdentificationResult so this doesn't need the package qualifier now. |
||
| String name = result.getName(); | ||
| if (result.getMethod().equals(IdentificationMethod.CONTAINER) | ||
| && (droidCore.formatNameByPuid(result.getPuid()) != null)) { | ||
| name = droidCore.formatNameByPuid(result.getPuid()); | ||
| } | ||
| return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri); | ||
| return new IdentificationResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri); | ||
| } | ||
|
|
||
| private <T> IdentificationResultCollection identifyByExtension(final IdentificationRequest<T> identificationRequest) { | ||
|
|
@@ -344,7 +447,7 @@ private <T> IdentificationResultCollection identifyByExtension(final Identificat | |
| private Optional<String> getContainerPuid(final IdentificationResultCollection binaryResult) { | ||
| List<String> containerPuids = Arrays.asList(ZIP_PUID, OLE2_PUID, GZIP_PUID); | ||
| return binaryResult.getResults().stream() | ||
| .map(IdentificationResult::getPuid) | ||
| .map(uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult::getPuid) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't you just use
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above, I've renamed the record so this can be used unqualified. |
||
| .filter(containerPuids::contains).findFirst(); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,48 +31,13 @@ | |
| */ | ||
| package uk.gov.nationalarchives.droid.internal.api; | ||
|
|
||
| import uk.gov.nationalarchives.droid.core.interfaces.IdentificationMethod; | ||
|
|
||
| import java.net.URI; | ||
|
|
||
| public class ApiResult { | ||
| private final String extension; | ||
| private final IdentificationMethod method; | ||
| private final String puid; | ||
| private final String name; | ||
| private final boolean fileExtensionMismatch; | ||
| private final URI uri; | ||
|
|
||
| public ApiResult(String extension, IdentificationMethod method, String puid, String name, boolean fileExtensionMismatch, URI uri) { | ||
| this.extension = extension; | ||
| this.method = method; | ||
| this.puid = puid; | ||
| this.name = name; | ||
| this.fileExtensionMismatch = fileExtensionMismatch; | ||
| this.uri = uri; | ||
| } | ||
|
|
||
| public String getName() { | ||
| return name; | ||
| } | ||
|
|
||
| public String getPuid() { | ||
| return puid; | ||
| } | ||
|
|
||
| public IdentificationMethod getMethod() { | ||
| return method; | ||
| } | ||
|
|
||
| public String getExtension() { | ||
| return extension; | ||
| } | ||
|
|
||
| public boolean isFileExtensionMismatch() { | ||
| return fileExtensionMismatch; | ||
| } | ||
|
|
||
| public URI getUri() { | ||
| return uri; | ||
| } | ||
| public enum HashAlgorithm { | ||
| /** MD5. **/ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these comments necessary?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically they are because of the checkstyle rules but I agree that this is daft so I've disabled that rule for this class. |
||
| MD5, | ||
| /** SHA1. **/ | ||
| SHA1, | ||
| /** SHA256. **/ | ||
| SHA256, | ||
| /** SHA512. **/ | ||
| SHA512 | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be my ignorance of Java, but on line 182, couldn't you just do
private List<HashAlgorithm> hashAlgorithms = Collections.emptyList();orprivate List<HashAlgorithm> hashAlgorithmsOrEmptyList = Collections.emptyList();?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you're right, I've changed the builder so it calls
private List<HashAlgorithm> hashAlgorithms = Collections.emptyList();There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You would still need the null protection in case someone directly uses
Builderand passes in a null. Might be easier to null check inpublic DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did think that but then I thought, if you're explicitly passing null into a builder then you deserve what happens to you. That being said, I've added a null check in the builder method.