Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data Dataset a
- dMap :: (StaticSerialise a, StaticSerialise b) => Closure (a -> b) -> Dataset a -> Dataset b
- dFilter :: StaticSerialise a => Closure (a -> Bool) -> Dataset a -> Dataset a
- dConcatMap :: (StaticSerialise a, StaticSerialise b) => Closure (a -> [b]) -> Dataset a -> Dataset b
- dGroupedAggr :: (StaticHashable k, StaticSerialise k, StaticSerialise b) => Int -> Closure (a -> k) -> Aggr a b -> Dataset a -> Dataset (k, b)
- dDistinct :: StaticHashable a => Int -> Dataset a -> Dataset a
- dDistinctBy :: StaticHashable b => Int -> Closure (a -> b) -> Dataset a -> Dataset a
- dCoalesce :: Typeable a => Int -> Dataset a -> Dataset a
- dPipe :: (StaticSerialise a, StaticSerialise b) => Closure (ConduitT a b (ResourceT IO) ()) -> Dataset a -> Dataset b
- dPartition :: (StaticSerialise a, StaticHashable k) => Int -> Closure (a -> k) -> Dataset a -> Dataset a
- module Control.Distributed.Dataset.Aggr
- dAggr :: (StaticSerialise a, StaticSerialise b) => Aggr a b -> Dataset a -> DD b
- dFetch :: StaticSerialise a => Dataset a -> DD (ConduitT () a (ResourceT IO) ())
- dToList :: StaticSerialise a => Dataset a -> DD [a]
- data DD a
- runDD :: Backend -> ShuffleStore -> DD a -> IO a
- runDDWith :: LogLevel -> Backend -> ShuffleStore -> DD a -> IO a
- data Backend
- data ShuffleStore
- data Partition a
- mkPartition :: Typeable a => Closure (ConduitT () a (ResourceT IO) ()) -> Partition a
- dExternal :: StaticSerialise a => [Partition a] -> Dataset a
- class (Typeable a, Serialise a) => StaticSerialise a where
- staticSerialise :: Closure (Dict (Typeable a, Serialise a))
- class (Typeable a, Eq a, Hashable a) => StaticHashable a where
- initDistributedFork :: IO ()
- liftIO :: MonadIO m => IO a -> m a
- (&) :: a -> (a -> b) -> b
- data Closure a
- cap :: Typeable a => Closure (a -> b) -> Closure a -> Closure b
- cpure :: Closure (Dict (Serializable a)) -> a -> Closure a
- data Dict a where
Documentation
Represents a partitioned multiset that can be transformed in a distributed fashion.
Transformations
dMap :: (StaticSerialise a, StaticSerialise b) => Closure (a -> b) -> Dataset a -> Dataset b Source #
Returns a new Dataset that contains the result of applying the given function to each element.
dFilter :: StaticSerialise a => Closure (a -> Bool) -> Dataset a -> Dataset a Source #
Returns a new Dataset that only contains elements where the given function returns true.
dConcatMap :: (StaticSerialise a, StaticSerialise b) => Closure (a -> [b]) -> Dataset a -> Dataset b Source #
Returns a new Dataset by first applying a function to all elements of this Dataset, and then flattening the results.
:: (StaticHashable k, StaticSerialise k, StaticSerialise b) | |
=> Int | Target number of partitions |
-> Closure (a -> k) | Grouping key |
-> Aggr a b | |
-> Dataset a | |
-> Dataset (k, b) |
Apply an aggregation to all rows sharing the same key.
dDistinct :: StaticHashable a => Int -> Dataset a -> Dataset a Source #
Removes a new dataset with duplicate rows removed.
:: StaticHashable b | |
=> Int | Target number of partitions |
-> Closure (a -> b) | |
-> Dataset a | |
-> Dataset a |
Removes a new dataset with rows with the duplicate keys removed.
Low-level transformations
dCoalesce :: Typeable a => Int -> Dataset a -> Dataset a Source #
Coalesce partitions together to get the specified number of partitions.
dPipe :: (StaticSerialise a, StaticSerialise b) => Closure (ConduitT a b (ResourceT IO) ()) -> Dataset a -> Dataset b Source #
Transforms a Dataset
by passing every partition through the given Conduit.
dPartition :: (StaticSerialise a, StaticHashable k) => Int -> Closure (a -> k) -> Dataset a -> Dataset a Source #
Re-partition the dataset using the given function so that the items with the same k
will
end up in the same partition.
Aggregations
Execution
dAggr :: (StaticSerialise a, StaticSerialise b) => Aggr a b -> Dataset a -> DD b Source #
Apply an aggregation to all items on a Dataset, and fetch the result.
dFetch :: StaticSerialise a => Dataset a -> DD (ConduitT () a (ResourceT IO) ()) Source #
Returns a Conduit to fetch the results lazily to the driver.
Instances
Backend
is responsible for running your functions in a remote environment.
Should run the current binary in the target environment, put the given
string as standard input and return the executables answer on the standard
output.
|
BackendM is essentially IO
, but also has the ability to report the status of the
executor.
data ShuffleStore Source #
Provides a way to store intermediate temporary data.
Creating datasets
Represents some amount of data which to be transformed on a single executor.
mkPartition :: Typeable a => Closure (ConduitT () a (ResourceT IO) ()) -> Partition a Source #
Create a Partition
given a Source conduit.
dExternal :: StaticSerialise a => [Partition a] -> Dataset a Source #
Create a dataset from given Partition
's.
Class
class (Typeable a, Serialise a) => StaticSerialise a where Source #
Instances
class (Typeable a, Eq a, Hashable a) => StaticHashable a where Source #
Instances
StaticHashable Bool Source # | |
StaticHashable Char Source # | |
StaticHashable Double Source # | |
StaticHashable Float Source # | |
StaticHashable Int Source # | |
StaticHashable Integer Source # | |
StaticHashable () Source # | |
Defined in Control.Distributed.Dataset.Internal.Class | |
StaticHashable Text Source # | |
StaticHashable a => StaticHashable [a] Source # | |
Defined in Control.Distributed.Dataset.Internal.Class | |
(StaticHashable a, StaticHashable b) => StaticHashable (a, b) Source # | |
Defined in Control.Distributed.Dataset.Internal.Class | |
(StaticHashable a, StaticHashable b, StaticHashable c) => StaticHashable (a, b, c) Source # | |
Defined in Control.Distributed.Dataset.Internal.Class |
Re-exports
initDistributedFork :: IO () Source #
On distributed-fork, we run the same binary both in your machine (called
"driver") and in the remote environment (called "executor"). In order for the
program to act according to where it is, you should call this function as the
first thing in your main
:
Closure
Type of serializable closures. Abstractly speaking, a closure is a code
reference paired together with an environment. A serializable closure
includes a shareable code reference (i.e. a StaticPtr
). Closures can be
serialized only if all expressions captured in the environment are
serializable.
Instances
IsStatic Closure | |
Defined in Control.Distributed.Closure.Internal fromStaticPtr :: StaticPtr a -> Closure a Source # | |
StaticApply Closure | |
Defined in Control.Applicative.Static | |
StaticFunctor Closure | |
Typeable a => Binary (Closure a) | |
cap :: Typeable a => Closure (a -> b) -> Closure a -> Closure b Source #
Closure application. Note that Closure
is not a functor, let alone an
applicative functor, even if it too has a meaningful notion of application.
cpure :: Closure (Dict (Serializable a)) -> a -> Closure a Source #
A closure can be created from any serializable value. cpure
corresponds
to Control.Applicative's pure
, but restricted to
lifting serializable values only.
Values of type
capture a dictionary for a constraint of type Dict
pp
.
e.g.
Dict
::Dict
(Eq
Int
)
captures a dictionary that proves we have an:
instance Eq
'Int
Pattern matching on the Dict
constructor will bring this instance into scope.
Instances
HasDict a (Dict a) | |
a :=> (Read (Dict a)) | |
a :=> (Monoid (Dict a)) | |
a :=> (Enum (Dict a)) | |
a :=> (Bounded (Dict a)) | |
() :=> (Eq (Dict a)) | |
() :=> (Ord (Dict a)) | |
() :=> (Show (Dict a)) | |
() :=> (Semigroup (Dict a)) | |
a => Bounded (Dict a) | |
a => Enum (Dict a) | |
Defined in Data.Constraint succ :: Dict a -> Dict a Source # pred :: Dict a -> Dict a Source # toEnum :: Int -> Dict a Source # fromEnum :: Dict a -> Int Source # enumFrom :: Dict a -> [Dict a] Source # enumFromThen :: Dict a -> Dict a -> [Dict a] Source # enumFromTo :: Dict a -> Dict a -> [Dict a] Source # enumFromThenTo :: Dict a -> Dict a -> Dict a -> [Dict a] Source # | |
Eq (Dict a) | |
(Typeable p, p) => Data (Dict p) | |
Defined in Data.Constraint gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Dict p -> c (Dict p) Source # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c (Dict p) Source # toConstr :: Dict p -> Constr Source # dataTypeOf :: Dict p -> DataType Source # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c (Dict p)) Source # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (Dict p)) Source # gmapT :: (forall b. Data b => b -> b) -> Dict p -> Dict p Source # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Dict p -> r Source # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Dict p -> r Source # gmapQ :: (forall d. Data d => d -> u) -> Dict p -> [u] Source # gmapQi :: Int -> (forall d. Data d => d -> u) -> Dict p -> u Source # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Dict p -> m (Dict p) Source # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Dict p -> m (Dict p) Source # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Dict p -> m (Dict p) Source # | |
Ord (Dict a) | |
Defined in Data.Constraint | |
a => Read (Dict a) | |
Show (Dict a) | |
Semigroup (Dict a) | |
a => Monoid (Dict a) | |
NFData (Dict c) | |
Defined in Data.Constraint |