I recently created a Windows Azure Storage Account without an Affinity Group and was faced with the task of migrating the data to a newly created Windows Azure Storage Account which belongs to the Affinity Group used by the project’s Cloud Services.
By physically grouping Cloud and Storage services together, Affinity Groups contribute to the overall performance of your application by reducing network latency. Consequently it reduces the amount of time a CPU remains idle waiting for files to process. It may not seem important at first, but it makes a world of difference for compute intensive worker roles who depend on Windows Azure Storage for IO.
Migrating data can be achieved in a couple of ways
- Download the data and upload the data to the new Storage Account. (This is by far the option that takes the longest time)
- Use tools like Azure Management Studio (Some of these solutions cost money)
- You can make your own tool
I had a few thousand files to migrate and I have not had much success with tools in the past, so I decided to make it myself. I fired up Visual Studio, got some music going and came up with a prototype solution.
First I need a source account with containers and Block Blobs, then I need a target Windows Azure Storage Account, so I created a new account using the appropriate Affinity Group.
And then, I started throwing ideas around in Visual Studio. This is the result from my short brainstorm session:
public class StorageAccountMigrator { private readonly CloudStorageAccount sourceAccount; private readonly CloudStorageAccount targetAccount; public StorageAccountMigrator() { var sourceCs = CloudConfigurationManager.GetSetting("source"); sourceAccount = CloudStorageAccount.Parse(sourceCs); var targetCs = CloudConfigurationManager.GetSetting("target"); targetAccount = CloudStorageAccount.Parse(targetCs); } public async Task<string> Start() { return await Task.Run(() => { var tasks = new Task[] { MigrateBlobContainers(), MigrateTableStorage(), }; Task.WaitAll(tasks); return "done"; }); } private Task MigrateTableStorage() { return Task.Run(() => { CopyTableStorageFromSource(); return "done"; }); } private void CopyTableStorageFromSource() { var source = sourceAccount.CreateCloudTableClient(); var cloudTables = source.ListTables() .OrderBy(c => c.Name) .ToList(); foreach (var table in cloudTables) CopyTables(table); } private void CopyTables(CloudTable table) { var target = targetAccount.CreateCloudTableClient(); var targetTable = target.GetTableReference(table.Name); targetTable.CreateIfNotExists(); targetTable.SetPermissions(table.GetPermissions()); Console.WriteLine("Created Table Storage :" +table.Name); CopyData(table, targetTable); } private void CopyData(CloudTable table, CloudTable targetTable) { //TODO: Add Migration Code Here } public Task<string> MigrateBlobContainers() { return Task.Run(() => { CopyBlobContainersFromSource(); return "done"; }); } private void CopyBlobContainersFromSource() { var source = sourceAccount.CreateCloudBlobClient(); var cloudBlobContainers = source.ListContainers() .OrderBy(c => c.Name) .ToList(); foreach (var cloudBlobContainer in cloudBlobContainers) CopyBlobContainer(cloudBlobContainer); } private void CopyBlobContainer(CloudBlobContainer sourceContainer) { var targetContainer = MakeContainer(sourceContainer); var targetBlobs = targetContainer.ListBlobs(null, true, BlobListingDetails.All) .Select(b => (ICloudBlob)b) .ToList(); Trace.WriteLine(sourceContainer.Name + " Created"); Trace.WriteLine(sourceContainer.Name + " List all blobs"); var sourceBlobs = sourceContainer .ListBlobs(null, true, BlobListingDetails.All) .Select(b => (ICloudBlob)b) .ToList(); var missingBlobTask = Task.Run(() => { AddMissingBlobs(sourceContainer, sourceBlobs, targetBlobs, targetContainer); }); var updateBlobs = Task.Run(() => UpdateBlobs(sourceContainer, sourceBlobs, targetBlobs, targetContainer)); Task.WaitAll(new[] { missingBlobTask, updateBlobs }); } private void UpdateBlobs(CloudBlobContainer sourceContainer, IEnumerable<ICloudBlob> sourceBlobs, IEnumerable<ICloudBlob> targetBlobs, CloudBlobContainer targetContainer) { var updatedBlobs = sourceBlobs .AsParallel() .Select(sb => { var tb = targetBlobs.FirstOrDefault(b => b.Name == sb.Name); if (tb == null) return new { Source = sb, Target = sb, }; if (tb.Properties.LastModified < sb.Properties.LastModified) return new { Source = sb, Target = tb, }; return new { Source = sb, Target = sb, }; }) .Where(b => b.Source != b.Target) .ToList(); Console.WriteLine(targetContainer.Name + " |> " + "Updating :" + updatedBlobs.Count + " blobs"); Trace.WriteLine(sourceContainer.Name + " Start update all blobs"); Parallel.ForEach(updatedBlobs, blob => { TryCopyBlobToTargetContainer(blob.Source, targetContainer, sourceContainer); }); Trace.WriteLine(sourceContainer.Name + " End update all blobs"); } private void AddMissingBlobs(CloudBlobContainer sourceContainer, IEnumerable<ICloudBlob> sourceBlobs, IEnumerable<ICloudBlob> targetBlobs, CloudBlobContainer targetContainer) { var missingBlobs = sourceBlobs.AsParallel() .Where(b => NotExists(targetBlobs, b)) .ToList(); Console.WriteLine(targetContainer.Name + " |> " + "Adding missing :" + missingBlobs.Count + " blobs"); Trace.WriteLine(sourceContainer.Name + " Start copy missing blobs"); Parallel.ForEach(missingBlobs, blob => { TryCopyBlobToTargetContainer(blob, targetContainer, sourceContainer); }); Trace.WriteLine(sourceContainer.Name + " End copy missing blobs"); } private static bool NotExists(IEnumerable<ICloudBlob> targetBlobs, ICloudBlob b) { return targetBlobs.All(tb => tb.Name != b.Name); } private CloudBlobContainer MakeContainer(CloudBlobContainer sourceContainer) { var target = targetAccount.CreateCloudBlobClient(); var targetContainer = target.GetContainerReference(sourceContainer.Name); Trace.WriteLine(sourceContainer.Name + " Started"); targetContainer.CreateIfNotExists(); var blobContainerPermissions = sourceContainer.GetPermissions(); if (blobContainerPermissions != null) targetContainer.SetPermissions(blobContainerPermissions); Trace.WriteLine(sourceContainer.Name + " Set Permissions"); foreach (var meta in sourceContainer.Metadata) targetContainer.Metadata.Add(meta); targetContainer.SetMetadata(); Trace.WriteLine(sourceContainer.Name + " Set Metadata"); return targetContainer; } private void TryCopyBlobToTargetContainer(ICloudBlob item, CloudBlobContainer targetContainer, CloudBlobContainer sourceContainer) { try { var blob = (CloudBlockBlob)item; var blobRef = targetContainer.GetBlockBlobReference(blob.Name); var source = new Uri(GetShareAccessUri(blob.Name, 360, sourceContainer)); var result = blobRef.StartCopyFromBlob(source); Trace.WriteLine(blob.Properties.LastModified.ToString() + " |>" + blob.Name + " :" + result); } catch (StorageException ex) { Trace.WriteLine(ex.Message); } } private string GetShareAccessUri(string blobname, int validityPeriodInMinutes, CloudBlobContainer container) { var toDateTime = DateTime.Now.AddMinutes(validityPeriodInMinutes); var policy = new SharedAccessBlobPolicy { Permissions = SharedAccessBlobPermissions.Read, SharedAccessStartTime = null, SharedAccessExpiryTime = new DateTimeOffset(toDateTime) }; var blob = container.GetBlockBlobReference(blobname); var sas = blob.GetSharedAccessSignature(policy); return blob.Uri.AbsoluteUri + sas; } }
The Storage Account Migrator first starts by listing all the containers from the source Windows Azure Storage Account. For each container, it creates a copy in the target Windows Azure Storage Account and then proceeds to try to copy each blob within Windows Azure. This requires a Shared Access Signature so that the target account is able to create a copy of the blob from the source.
This code is far from perfect, but it gives you an example that you can use when developing your own solution.