Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions droid-api/checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@
<suppress checks="ClassFanOutComplexity|ClassDataAbstractionCoupling"
files=".*ProfileInstanceManagerImpl\.java$"/>

<suppress checks="ClassFanOutComplexity|ClassDataAbstractionCoupling"
files=".*DroidAPI\.java$"/>

<suppress checks="JavadocVariable"
files=".*HashAlgorithm\.java$"/>

<!-- suppress requirement for @return on enum, which conflicts with new XDocLint -->
<suppress checks="JavadocMethodCheck"
files=".*ProfileState\.java$"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,41 @@
*/
package uk.gov.nationalarchives.droid.internal.api;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.file.Files;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;

import org.apache.http.client.utils.URIBuilder;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Uri;
import software.amazon.awssdk.services.s3.S3Utilities;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.services.s3.model.S3Object;
import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
import uk.gov.nationalarchives.droid.core.SignatureParseException;
import uk.gov.nationalarchives.droid.core.interfaces.*;
import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier;
import uk.gov.nationalarchives.droid.core.interfaces.hash.MD5HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA1HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA256HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.hash.SHA512HashGenerator;
import uk.gov.nationalarchives.droid.core.interfaces.resource.*;


Expand Down Expand Up @@ -107,8 +114,21 @@ public final class DroidAPI implements AutoCloseable {

private final HttpClient httpClient;


private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion, S3Client s3Client, HttpClient httpClient, Region s3Region) {
private final List<HashAlgorithm> hashAlgorithms;

private DroidAPI(
DroidCore droidCore,
ContainerIdentifier zipIdentifier,
ContainerIdentifier ole2Identifier,
ContainerIdentifier gzIdentifier,
String containerSignatureVersion,
String binarySignatureVersion,
String droidVersion,
S3Client s3Client,
HttpClient httpClient,
Region s3Region,
List<HashAlgorithm> hashAlgorithms
) {
this.droidCore = droidCore;
this.zipIdentifier = zipIdentifier;
this.ole2Identifier = ole2Identifier;
Expand All @@ -119,8 +139,14 @@ private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, Contain
this.s3Region = getRegionOrDefault(s3Region);
this.s3Client = getS3ClientOrDefault(s3Client);
this.httpClient = getHttpClientOrDefault(httpClient);
this.hashAlgorithms = hashAlgorithms;
}

public record APIResult(List<APIIdentificationResult> identificationResults, Map<HashAlgorithm, String> hashResults) {}

public record APIIdentificationResult(String extension, IdentificationMethod method, String puid, String name,
boolean fileExtensionMismatch, URI uri) { }

private HttpClient getHttpClientOrDefault(HttpClient httpClient) {
return Optional.ofNullable(httpClient)
.orElse(HttpClient.newHttpClient());
Expand Down Expand Up @@ -154,6 +180,7 @@ public static class DroidAPIBuilder {
private S3Client s3Client;
private Region s3Region;
private HttpClient httpClient;
private List<HashAlgorithm> hashAlgorithms = Collections.emptyList();

public DroidAPIBuilder binarySignature(final Path binarySignature) {
this.binarySignature = binarySignature;
Expand All @@ -180,6 +207,11 @@ public DroidAPIBuilder httpClient(final HttpClient httpClient) {
return this;
}

public DroidAPIBuilder hashAlgorithms(final List<HashAlgorithm> hashAlgorithms) {
this.hashAlgorithms = hashAlgorithms == null ? Collections.emptyList() : hashAlgorithms;
return this;
}

public DroidAPI build() throws SignatureParseException {
if (this.binarySignature == null || this.containerSignature == null) {
throw new IllegalArgumentException("Container signature and binary signature are mandatory arguments");
Expand All @@ -192,7 +224,7 @@ public DroidAPI build() throws SignatureParseException {
String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0];
String droidVersion = ResourceBundle.getBundle("options").getString("version_no");
ContainerApi containerApi = new ContainerApi(droidCore, containerSignature);
return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region);
return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion, this.s3Client, this.httpClient, this.s3Region, this.hashAlgorithms);
}
}

Expand All @@ -207,7 +239,7 @@ public static DroidAPIBuilder builder() {
* @return File identification result. File can have multiple matching signatures.
* @throws IOException If File can't be read or there is IO error.
*/
public List<ApiResult> submit(final URI uri, String extension) throws IOException {
public List<APIResult> submit(final URI uri, String extension) throws IOException {
if (S3_SCHEME.equals(uri.getScheme())) {
return submitS3Identification(uri, extension);
} else if (List.of("http", "https").contains(uri.getScheme())) {
Expand All @@ -224,11 +256,11 @@ public List<ApiResult> submit(final URI uri, String extension) throws IOExceptio
* @return File identification result. File can have multiple matching signatures.
* @throws IOException If File can't be read or there is IO error.
*/
public List<ApiResult> submit(final URI uri) throws IOException {
public List<APIResult> submit(final URI uri) throws IOException {
return submit(uri, null);
}

private List<ApiResult> submitHttpIdentification(final URI uri, String extension) throws IOException {
private List<APIResult> submitHttpIdentification(final URI uri, String extension) throws IOException {
HttpClient httpClient = this.httpClient == null ? HttpClient.newHttpClient() : this.httpClient;
HttpUtils httpUtils = new HttpUtils(httpClient);
HttpUtils.HttpMetadata httpMetadata = httpUtils.getHttpMetadata(uri);
Expand All @@ -243,18 +275,72 @@ private List<ApiResult> submitHttpIdentification(final URI uri, String extension

final RequestIdentifier id = getRequestIdentifier(uri);

Map<HashAlgorithm, String> hashResults = generateHashResults(uri, this::getHttpHash);

try (final HttpIdentificationRequest request = new HttpIdentificationRequest(metaData, id, httpClient)) {
request.setExtension(extension);
request.open(uri);
return getApiResults(request);
return List.of(new APIResult(getIdentificationResults(request), hashResults));
}
}

private List<ApiResult> submitS3Identification(final URI uri, String extension) throws IOException {
private <T> Map<HashAlgorithm, String> generateHashResults(T identifier, BiFunction<HashAlgorithm, T, String> hashFunction) {
return hashAlgorithms.stream().collect(Collectors.toMap(

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potential NPE if hashAlgorithm is not requested by the API user.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good spot. And the test was always passing in an empty list so I didn't notice. The builder will now return an empty list if hasAlgorithms isn't set and I've updated the test to pass null in to check that it works.

algorithm -> algorithm,
algorithm -> hashFunction.apply(algorithm, identifier)
));
}

private String getFileHash(HashAlgorithm hashAlgorithm, Path path) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the FileInputStream staying open here?
Might be easier to use try-with-resource so it closes automatically

        try (InputStream fs = new FileInputStream(path.toFile())){
            return getHash(hashAlgorithm, fs);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I've changed it.

try (InputStream fileInputStream = new FileInputStream(path.toFile())) {
return getHash(hashAlgorithm, fileInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private String getS3Hash(HashAlgorithm algorithm, S3Uri s3Uri) {
String key = s3Uri.key().orElseThrow(() -> new RuntimeException("Key not found in uri " + s3Uri.uri()));
String bucket = s3Uri.bucket().orElseThrow(() -> new RuntimeException("Bucket not found in uri " + s3Uri.uri()));

try (ResponseInputStream<GetObjectResponse> responseInputStream = s3Client.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build())) {
return getHash(algorithm, responseInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private String getHttpHash(HashAlgorithm algorithm, URI httpUri) {
HttpRequest request = HttpRequest.newBuilder()
.uri(httpUri)
.GET()
.build();
try {
try (InputStream responseStream = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()).body()) {
return getHash(algorithm, responseStream);
}
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}

private String getHash(HashAlgorithm algorithm, InputStream inputStream) {
try {
return switch (algorithm) {
case MD5 -> new MD5HashGenerator().hash(inputStream);
case SHA1 -> new SHA1HashGenerator().hash(inputStream);
case SHA256 -> new SHA256HashGenerator().hash(inputStream);
case SHA512 -> new SHA512HashGenerator().hash(inputStream);
};
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private List<APIResult> submitS3Identification(final URI uri, String extension) throws IOException {
S3Utils s3Utils = new S3Utils(s3Client);
S3Utils.S3ObjectList objectList = s3Utils.listObjects(uri);
List<ApiResult> apiResults = new ArrayList<>();
List<APIResult> apiResults = new ArrayList<>();

for (S3Object s3Object: objectList.contents()) {
URIBuilder uriBuilder = new URIBuilder();
Expand All @@ -265,12 +351,14 @@ private List<ApiResult> submitS3Identification(final URI uri, String extension)
throw new RuntimeException(e);
}
S3Uri s3Uri = S3Utilities.builder().region(s3Region).build().parseUri(objectUri);
Map<HashAlgorithm, String> hashResults = generateHashResults(s3Uri, this::getS3Hash);

final RequestIdentifier id = getRequestIdentifier(s3Uri.uri());
RequestMetaData metaData = new RequestMetaData(s3Object.size(), s3Object.lastModified().getEpochSecond(), s3Uri.uri().toString());
try (final S3IdentificationRequest request = new S3IdentificationRequest(metaData, id, s3Client)) {
request.setExtension(extension);
request.open(s3Uri);
apiResults.addAll(getApiResults(request));
apiResults.add(new APIResult(getIdentificationResults(request), hashResults));
}
}
return apiResults;
Expand All @@ -284,23 +372,45 @@ private static RequestIdentifier getRequestIdentifier(URI uri) {
}


private List<ApiResult> submitFileSystemIdentification(final Path file, String extension) throws IOException {
final RequestMetaData metaData = new RequestMetaData(
Files.size(file),
Files.getLastModifiedTime(file).toMillis(),
file.toAbsolutePath().toString()
);
private List<APIResult> submitFileSystemIdentification(final Path file, String extension) throws IOException {
if (Files.isDirectory(file)) {
try (Stream<Path> files = Files.walk(file)) {
return files.filter(Files::isRegularFile)
.map(eachFile -> getApiResultForFile(extension, eachFile))
.toList();
}

} else {
return List.of(getApiResultForFile(extension, file));
}
}

private APIResult getApiResultForFile(String extension, Path eachFile) {
final RequestMetaData metaData;
try {
metaData = new RequestMetaData(
Files.size(eachFile),
Files.getLastModifiedTime(eachFile).toMillis(),
eachFile.toAbsolutePath().toString()
);
} catch (IOException e) {
throw new RuntimeException(e);
}

final RequestIdentifier id = getRequestIdentifier(eachFile.toAbsolutePath().toUri());

final RequestIdentifier id = getRequestIdentifier(file.toAbsolutePath().toUri());
Map<HashAlgorithm, String> hashResults = generateHashResults(eachFile, this::getFileHash);

try (final FileSystemIdentificationRequest request = new FileSystemIdentificationRequest(metaData, id)) {
request.setExtension(extension);
request.open(file);
return getApiResults(request);
request.open(eachFile);
return new APIResult(getIdentificationResults(request), hashResults);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) throws IOException {
private <T> List<APIIdentificationResult> getIdentificationResults(IdentificationRequest<T> request) throws IOException {
IdentificationResultCollection resultCollection;
String extension = request.getExtension();

Expand All @@ -322,17 +432,17 @@ private <T> List<ApiResult> getApiResults(IdentificationRequest<T> request) thro
boolean fileExtensionMismatch = resultCollection.getExtensionMismatch();

return resultCollection.getResults()
.stream().map(res -> createApiResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri()))
.stream().map(res -> createIdentificationResult(res, extension, fileExtensionMismatch, request.getIdentifier().getUri()))
.collect(Collectors.toList());
}

private ApiResult createApiResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) {
private APIIdentificationResult createIdentificationResult(IdentificationResult result, String extension, boolean extensionMismatch, URI uri) {
String name = result.getName();
if (result.getMethod().equals(IdentificationMethod.CONTAINER)
&& (droidCore.formatNameByPuid(result.getPuid()) != null)) {
name = droidCore.formatNameByPuid(result.getPuid());
}
return new ApiResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri);
return new APIIdentificationResult(extension, result.getMethod(), result.getPuid(), name, extensionMismatch, uri);
}

private <T> IdentificationResultCollection identifyByExtension(final IdentificationRequest<T> identificationRequest) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,48 +31,9 @@
*/
package uk.gov.nationalarchives.droid.internal.api;

import uk.gov.nationalarchives.droid.core.interfaces.IdentificationMethod;

import java.net.URI;

public class ApiResult {
private final String extension;
private final IdentificationMethod method;
private final String puid;
private final String name;
private final boolean fileExtensionMismatch;
private final URI uri;

public ApiResult(String extension, IdentificationMethod method, String puid, String name, boolean fileExtensionMismatch, URI uri) {
this.extension = extension;
this.method = method;
this.puid = puid;
this.name = name;
this.fileExtensionMismatch = fileExtensionMismatch;
this.uri = uri;
}

public String getName() {
return name;
}

public String getPuid() {
return puid;
}

public IdentificationMethod getMethod() {
return method;
}

public String getExtension() {
return extension;
}

public boolean isFileExtensionMismatch() {
return fileExtensionMismatch;
}

public URI getUri() {
return uri;
}
public enum HashAlgorithm {
MD5,
SHA1,
SHA256,
SHA512
}
Loading
Loading