using System;
using System.Collections.Generic;
using System.Text;
using System.Diagnostics;
using System.Threading;
using System.ComponentModel;
using System.Collections.Concurrent;
namespace FASTER.core
{
///
/// A logically composes multiple into a single storage device. It is assumed
/// that some are used as caches while there is one that is considered the commit point, i.e. when a write is completed
/// on the device, it is considered persistent. Reads are served from the closest device with available data. Writes are issued in parallel to
/// all devices
///
class TieredStorageDevice : StorageDeviceBase
{
private readonly IList devices;
private readonly int commitPoint;
///
/// Constructs a new TieredStorageDevice composed of the given devices.
///
///
/// The index of an IDevice in . When a write has been completed on the device,
/// the write is considered persistent. It is guaranteed that the callback in
/// will not be called until the write is completed on the commit point device.
///
///
/// List of devices to be used. The list should be given in order of hot to cold. Read is served from the
/// device with smallest index in the list that has the requested data
///
public TieredStorageDevice(int commitPoint, IList devices) : base(ComputeFileString(devices, commitPoint), 512, ComputeCapacity(devices))
{
Debug.Assert(commitPoint >= 0 && commitPoint < devices.Count, "commit point is out of range");
this.devices = devices;
this.commitPoint = commitPoint;
}
///
/// Constructs a new TieredStorageDevice composed of the given devices.
///
///
/// The index of an IDevice in devices. When a write has been completed on the device,
/// the write is considered persistent. It is guaranteed that the callback in
/// will not be called until the write is completed on commit point device and all previous tiers.
///
///
/// List of devices to be used. The list should be given in order of hot to cold. Read is served from the
/// device with smallest index in the list that has the requested data
///
public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(commitPoint, (IList)devices)
{
}
public override void Initialize(long segmentSize, LightEpoch epoch)
{
base.Initialize(segmentSize, epoch);
foreach (IDevice devices in devices)
{
devices.Initialize(segmentSize, epoch);
}
}
public override void Close()
{
foreach (IDevice device in devices)
{
device.Close();
}
}
public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult)
{
// This device is epoch-protected and cannot be stale while the operation is in flight
IDevice closestDevice = devices[FindClosestDeviceContaining(segmentId)];
// We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not
// be present for certain devices.
closestDevice.ReadAsync(segmentId, sourceAddress, destinationAddress, readLength, callback, asyncResult);
}
public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult)
{
int startTier = FindClosestDeviceContaining(segmentId);
Debug.Assert(startTier <= commitPoint, "Write should not elide the commit point");
var countdown = new CountdownEvent(commitPoint + 1); // number of devices to wait on
// Issue writes to all tiers in parallel
for (int i = startTier; i < devices.Count; i++)
{
if (i <= commitPoint)
{
// All tiers before the commit point (incluisive) need to be persistent before the callback is invoked.
devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) =>
{
// The last tier to finish invokes the callback
if (countdown.Signal())
{
callback(e, n, o);
countdown.Dispose();
}
}, asyncResult);
}
else
{
// Otherwise, simply issue the write without caring about callbacks
devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) => { }, null);
}
}
}
public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result)
{
int startTier = FindClosestDeviceContaining(segment);
var countdown = new CountdownEvent(devices.Count);
for(int i = startTier; i < devices.Count; i++)
{
devices[i].RemoveSegmentAsync(segment, r =>
{
if (countdown.Signal())
{
callback(r);
countdown.Dispose();
}
}, result);
}
}
private static long ComputeCapacity(IList devices)
{
long result = 0;
// The capacity of a tiered storage device is the sum of the capacity of its tiers
foreach (IDevice device in devices)
{
// Unless the last tier device has unspecified storage capacity, in which case the tiered storage also has unspecified capacity
if (device.Capacity == Devices.CAPACITY_UNSPECIFIED)
{
Debug.Assert(device == devices[devices.Count - 1], "Only the last tier storage of a tiered storage device can have unspecified capacity");
return Devices.CAPACITY_UNSPECIFIED;
}
result = Math.Max(result, device.Capacity);
}
return result;
}
private static string ComputeFileString(IList devices, int commitPoint)
{
StringBuilder result = new StringBuilder();
foreach (IDevice device in devices)
{
string formatString = "{0}, file name {1}, capacity {2} bytes;";
string capacity = device.Capacity == Devices.CAPACITY_UNSPECIFIED ? "unspecified" : device.Capacity.ToString();
result.AppendFormat(formatString, device.GetType().Name, device.FileName, capacity);
}
result.AppendFormat("commit point: {0} at tier {1}", devices[commitPoint].GetType().Name, commitPoint);
return result.ToString();
}
private int FindClosestDeviceContaining(int segment)
{
// Can use binary search, but 1) it might not be faster than linear on a array assumed small, and 2) C# built in does not guarantee first element is returned on duplicates.
// Therefore we are sticking to the simpler approach at first.
for (int i = 0; i < devices.Count; i++)
{
if (devices[i].StartSegment <= segment) return i;
}
throw new ArgumentException("No such address exists");
}
}
}