This is my own implementation of S3 eTag. I checked this with the large file I uploaded to S3 to get the reference value for the multi-part Etag.
Keep in mind that client-side compression and encryption make eTag useless when it comes to checking the downloaded file.
Etag.java
package io.github.caillette.s3; import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; import com.google.common.io.ByteSource; import org.apache.commons.codec.digest.DigestUtils; import java.io.IOException; import java.io.InputStream; import java.security.DigestException; import java.security.MessageDigest; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import static com.google.common.base.Preconditions.checkArgument; public final class Etag { private final String md5 ; private final Integer partNumber ; private static final Pattern MD5_PATTERN = Pattern.compile( "[a-f0-9]{32}" ) ; private static final Pattern FULL_ETAG_PATTERN = Pattern.compile( "(" + MD5_PATTERN.pattern() + ")(?:-([0-9]+))?" ) ; private Etag( final byte[] md5, final Integer partNumber ) { this( md5asString( md5 ), partNumber ) ; } public static String md5asString( final byte[] md5 ) { checkArgument( md5.length == 16 ) ; return DigestTools.toHex( md5 ); } private Etag( final String md5, final Integer partNumber ) { checkArgument( MD5_PATTERN.matcher( md5 ).matches() ) ; checkArgument( partNumber == null || partNumber > 0 ) ; this.md5 = md5 ; this.partNumber = partNumber ; } public String asString() { return md5 + ( partNumber == null ? "" : "-" + partNumber ) ; } public static Etag parse( final String string ) { final Matcher matcher = FULL_ETAG_PATTERN.matcher( string ) ; checkArgument( matcher.matches(), "Invalid format: " + string ) ; final String md5 = matcher.group( 1 ) ; final String partNumber = matcher.group( 2 ) ; return new Etag( md5, partNumber == null ? null : Integer.parseInt( partNumber ) ) ; } @Override public String toString() { return getClass().getSimpleName() + "{" + asString() + "}" ; } @Override public boolean equals( final Object other ) { if( this == other ) { return true ; } if( other == null || getClass() != other.getClass() ) { return false ; } final Etag etag = ( Etag ) other ; if( ! md5.equals( etag.md5 ) ) { return false ; } if( partNumber != null ? !partNumber.equals( etag.partNumber ) : etag.partNumber != null ) { return false; } return true ; } @Override public int hashCode() { int result = md5.hashCode(); result = 31 * result + ( partNumber != null ? partNumber.hashCode() : 0 ) ; return result; } public static final long DEFAULT_MINIMUM_UPLOAD_PART_SIZE = new TransferManagerConfiguration().getMinimumUploadPartSize() ;
EtagTest.java
package io.github.caillette.s3; import com.google.common.io.Files; import org.apache.commons.codec.digest.DigestUtils; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.novelang.testing.junit.MethodSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import static org.assertj.core.api.Assertions.assertThat ; public class EtagTest { @Test public void bigMultipart() throws Exception { final File file = createGarbageFile( 120_000_000 ) ; final int chunkSize = 5 * 1024 * 1024 ; final long start = System.currentTimeMillis() ; final Etag etag = Etag.compute( Files.asByteSource( file ), chunkSize ) ; LOGGER.info( "Calculated " + etag + " in " + ( System.currentTimeMillis() - start ) + " ms." ) ; assertThat( etag.asString() ).isEqualTo( "94b81d1e846ec106c09eabc984314008-23" ) ; } @Test public void smallMultipart() throws Exception { final File file = createGarbageFile( 30_000 ) ; final int chunkSize = 10_000 ; final Etag etag = Etag.compute( Files.asByteSource( file ), chunkSize ) ; assertThat( etag.asString() ).isEqualTo( "056b4552c5ace587b5d62305d99e8555-3" ) ; } @Test public void parseMonopart() throws Exception { final Etag etag = Etag.parse( "056b4552c5ace587b5d62305d99e8555" ) ; assertThat( etag.asString() ).isEqualTo( "056b4552c5ace587b5d62305d99e8555" ) ; } @Test public void parseMultipart() throws Exception { final Etag etag = Etag.parse( "056b4552c5ace587b5d62305d99e8555-33" ) ; assertThat( etag.asString() ).isEqualTo( "056b4552c5ace587b5d62305d99e8555-33" ) ; } @Test public void smallMonopart() throws Exception { final File file = createGarbageFile( 1_000 ) ; final int chunkSize = 10_000 ; final Etag etag = Etag.compute( Files.asByteSource( file ), chunkSize ) ; assertThat( etag.asString() ).isEqualTo( "cc24b86af8f8c18ca90703db6834f3f3" ) ; }
GarbageTools.java
package io.github.caillette.s3; import com.google.common.base.Charsets; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; public final class GarbageTools { private GarbageTools() { } public static void generate( final File file, final long length ) throws IOException { try( final FileOutputStream fileOutputStream = new FileOutputStream( file ) ; final OutputStream outputStream = new BufferedOutputStream( fileOutputStream ) ) { generate( outputStream, length ) ; } } public static void generate( final OutputStream outputStream, final long length ) throws IOException { long bytesWritten = 0 ; long counter = 0 ; final StringBuilder stringBuilder = new StringBuilder() ; while( true ) { stringBuilder.append( counter ++ ).append( " " ) ; final int lineLength = stringBuilder.length() ; final boolean done = bytesWritten + lineLength >= length ; if( done ) { final int remainder = ( int ) ( length - bytesWritten ) ; stringBuilder.delete( remainder, stringBuilder.length() ) ; } outputStream.write( stringBuilder.toString().getBytes( Charsets.US_ASCII ) ) ; bytesWritten += stringBuilder.length() ; stringBuilder.delete( 0, stringBuilder.length() ) ; if( done ) { break ; } } } }
Laurent caillette
source share