From 5d7281c396502e27fb8abf256741a75be671808e Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 22 Apr 2026 19:14:10 +0530 Subject: [PATCH 01/10] Add a "Find" module --- src/Streamly/Coreutils/Find.hs | 283 +++++++++++++++++++++++++++++++++ streamly-coreutils.cabal | 7 +- 2 files changed, 287 insertions(+), 3 deletions(-) create mode 100644 src/Streamly/Coreutils/Find.hs diff --git a/src/Streamly/Coreutils/Find.hs b/src/Streamly/Coreutils/Find.hs new file mode 100644 index 0000000..2603b07 --- /dev/null +++ b/src/Streamly/Coreutils/Find.hs @@ -0,0 +1,283 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# OPTIONS_GHC -Wno-unused-binds #-} +{-# OPTIONS_GHC -Wno-unused-imports #-} +-- | +-- Module : Streamly.Coreutils.Find +-- Copyright : (c) 2026 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Similar to GNU find. Not all options are implemented yet. +-- +-- Examples: +-- List the current directory recursively using the internal traversal variants. +-- +-- >>> :{ +-- main :: IO () +-- main = do +-- hSetBuffering stdout LineBuffering +-- let path = fromJust $ Path.fromString "." +-- #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +-- Stream.fold (Handle.writeChunks stdout) +-- $ findByteChunked path +-- #else +-- Stream.fold (Handle.writeWith 32000 stdout) +-- $ reEncode +-- $ Stream.unfoldEachEndBy 10 Array.reader +-- $ fmap Path.toArray +-- $ Stream.unfoldEach Unfold.fromList +-- $ findChunked id path +-- #endif +-- :} +-- +-- >>> :{ +-- main :: IO () +-- main = do +-- hSetBuffering stdout LineBuffering +-- let path = fromJust $ Path.fromString "." +-- Stream.fold (Handle.writeWith 32000 stdout) +-- $ reEncode +-- $ Stream.unfoldEachEndBy 10 Array.reader +-- $ fmap Path.toArray +-- $ Stream.unfoldEach Unfold.fromList +-- $ findChunked id path +-- :} +-- +-- >>> :{ +-- main :: IO () +-- main = do +-- hSetBuffering stdout LineBuffering +-- let path = fromJust $ Path.fromString "." +-- Stream.fold (Handle.writeWith 32000 stdout) +-- $ reEncode +-- $ Stream.unfoldEachEndBy 10 Array.reader +-- $ fmap Path.toArray +-- $ find id path +-- :} +-- +-- Compare the above example with GNU @find@ or rust @fd@. To compare listing +-- the current directory recursively, use the following commands: +-- +-- @ +-- time find > /dev/null # GNU find +-- time fd -u > /dev/null # Rust fd +-- time hfind > /dev/null # This, Haskell implementation +-- @ + +module Streamly.Coreutils.Find + ( + find + , findChunked +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) + , findByteChunked +#endif + + -- * Options + , FindOptions + ) +where + +import Data.Maybe (fromJust) +import Data.Word (Word8) +import Streamly.Data.Array (Array) +import Streamly.Data.Stream (Stream) +import Streamly.Data.Unfold (Unfold) +import Streamly.FileSystem.DirIO (ReadOptions) +import Streamly.FileSystem.Path (Path, OsWord) +import System.IO (stdout, hSetBuffering, BufferMode(LineBuffering)) + +import qualified Streamly.Data.Stream.Prelude as Stream +import qualified Streamly.Data.Array as Array +import qualified Streamly.FileSystem.DirIO as DirIO +import qualified Streamly.Internal.Data.Array as GArray (compactMax') +import qualified Streamly.Internal.Data.Stream as Stream + (unfoldEachEndBy, concatIterateDfs, concatIterateBfs, concatIterateBfsRev) +import qualified Streamly.Data.StreamK as StreamK +import qualified Streamly.Internal.Data.StreamK as StreamK + (concatIterateWith, mergeIterateWith) +import qualified Streamly.Data.Unfold as Unfold +import qualified Streamly.Internal.Data.Unfold as Unfold + (either, nil) +import qualified Streamly.Internal.FileSystem.DirIO as Dir + (readEitherChunks, readEitherPaths, eitherReaderPaths) +import qualified Streamly.FileSystem.Handle as Handle +import qualified Streamly.FileSystem.Path as Path +import qualified Streamly.Internal.FileSystem.Path as Path (toArray) +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +import qualified Streamly.Internal.FileSystem.Posix.ReadDir as Dir + (readEitherByteChunks) +#else +import qualified Streamly.Unicode.Stream as Stream +#endif + +-- +-- Running on a sample directory tree the concurrent rust "fd" tool took 150 ms +-- (real time). On the same tree the fastest variant using Haskell streamly +-- below took 94 ms. The time taken by other variants on the same tree is noted +-- in the comments. The fastest serial implementation using Haskell streamly +-- takes similar time as the concurrent rust "fd". +-- +-- The code for directory traversal is just a few lines. This file is bigger +-- because we have implemented it in around 27 possible ways. To try other +-- variants just uncomment the relevant line and comment the currently enabled +-- line. + +newtype FindOptions = FindOptions {findConcurrent :: Bool} + +defaultConfig :: FindOptions +defaultConfig = FindOptions False + +concurrent :: Bool -> FindOptions -> FindOptions +concurrent opt cfg = cfg {findConcurrent = opt} + +{-# INLINE recReadOpts #-} +recReadOpts :: ReadOptions -> ReadOptions +{-# INLINE reEncode #-} +reEncode :: Stream IO OsWord -> Stream IO Word8 +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +recReadOpts = id + +reEncode = + Stream.encodeUtf8 + . Stream.decodeUtf16le +#else +recReadOpts = + DirIO.followSymlinks True + . DirIO.ignoreSymlinkLoops False + . DirIO.ignoreMissing True + . DirIO.ignoreInaccessible True + +reEncode = id +#endif + +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +-- Fastest implementation, only works for posix as of now. +findByteChunked :: Path -> Stream IO (Array Word8) +findByteChunked path = + Stream.catRights + -- Serial + -- $ Stream.concatIterateDfs streamDirMaybe -- 154 ms + -- $ Stream.concatIterateBfs streamDirMaybe -- 154 ms + -- $ Stream.concatIterateBfsRev streamDirMaybe -- 154 ms + + -- Serial using stream append and interleave + -- $ concatIterateWith StreamK.append -- 154 ms + -- $ mergeIterateWith StreamK.interleave -- 154 ms + + -- Concurrent + -- XXX To reduce concurrency overhead, perform buffering in each worker + -- and post the buffer or return [Path] and then unfold it. + $ Stream.parConcatIterate id streamDir -- 94 ms + -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 94 ms + -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 154 ms + + $ Stream.fromPure (Left [path]) + + where + + concatIterateWith f = + StreamK.toStream + . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + mergeIterateWith f = + StreamK.toStream + . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + -- cfg = Stream.eager False . Stream.maxBuffer 2000 . Stream.maxThreads 2 + streamDir :: Either [Path] b -> Stream IO (Either [Path] (Array Word8)) + streamDir = either (Dir.readEitherByteChunks recReadOpts) (const Stream.nil) + + streamDirMaybe :: Either [Path] b -> Maybe (Stream IO (Either [Path] (Array Word8))) + streamDirMaybe = either (Just . Dir.readEitherByteChunks recReadOpts) (const Nothing) +#endif + +-- Faster than the find implementation below +findChunked :: (FindOptions -> FindOptions) -> Path -> Stream IO [Path] +findChunked _opts path = + Stream.catRights + + -- Serial using streams + -- $ Stream.concatIterateDfs streamDirMaybe -- 264 ms + -- $ Stream.concatIterateBfs streamDirMaybe -- 264 ms + -- $ Stream.concatIterateBfsRev streamDirMaybe -- 264 ms + + -- Serial using stream append and interleave + -- $ concatIterateWith StreamK.append -- 164 ms + -- $ mergeIterateWith StreamK.interleave -- 194 ms + + -- Concurrent + $ Stream.parConcatIterate id streamDir -- 124 ms + -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 134 ms + -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 174 ms + + $ Stream.fromPure (Left [path]) + + where + + concatIterateWith f = + StreamK.toStream + . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + mergeIterateWith f = + StreamK.toStream + . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + streamDir :: Either [Path] b -> Stream IO (Either [Path] [Path]) + streamDir = either (Dir.readEitherChunks recReadOpts) (const Stream.nil) + + streamDirMaybe :: Either [Path] b -> Maybe (Stream IO (Either [Path] [Path])) + streamDirMaybe = either (Just . Dir.readEitherChunks recReadOpts) (const Nothing) + +find :: (FindOptions -> FindOptions) -> Path -> Stream IO Path +find _opts path = + Stream.catRights + + -- Serial using unfolds + -- $ Stream.unfoldIterateDfs unfoldDir -- 284 ms + -- May fail with too many open files + -- $ Stream.unfoldIterateBfs unfoldDir + -- $ Stream.unfoldIterateBfsRev unfoldDir -- 344 ms + + -- Serial using streams + -- $ Stream.concatIterateDfs streamDirMaybe -- 274 ms + -- $ Stream.concatIterateBfs streamDirMaybe -- 274 ms + -- $ Stream.concatIterateBfsRev streamDirMaybe -- 264 ms + + -- Serial using stream append and interleave + -- $ concatIterateWith StreamK.append -- 204 ms + -- $ mergeIterateWith StreamK.interleave -- 304 ms + + -- Concurrent + $ Stream.parConcatIterate id streamDir -- 174 ms + -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 224 ms + -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 234 ms + + $ Stream.fromPure (Left path) + + where + + concatIterateWith f = + StreamK.toStream + . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + mergeIterateWith f = + StreamK.toStream + . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.fromStream + + streamDir :: Either Path b -> Stream IO (Either Path Path) + streamDir = either (Dir.readEitherPaths recReadOpts) (const Stream.nil) + + unfoldDir :: Unfold IO (Either Path b) (Either Path Path) + unfoldDir = Unfold.either (Dir.eitherReaderPaths recReadOpts) Unfold.nil + + streamDirMaybe :: Either Path b -> Maybe (Stream IO (Either Path Path)) + streamDirMaybe = either (Just . Dir.readEitherPaths recReadOpts) (const Nothing) diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index d728e9c..a1119e5 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -122,11 +122,12 @@ library , Streamly.Coreutils.Chmod , Streamly.Coreutils.Common , Streamly.Coreutils.Cp + , Streamly.Coreutils.Cut , Streamly.Coreutils.Directory , Streamly.Coreutils.Dirname , Streamly.Coreutils.FileTest + , Streamly.Coreutils.Find , Streamly.Coreutils.Ln - , Streamly.Coreutils.Cut , Streamly.Coreutils.Ls , Streamly.Coreutils.Mkdir , Streamly.Coreutils.Mv @@ -139,7 +140,6 @@ library , Streamly.Coreutils.Tail , Streamly.Coreutils.Touch , Streamly.Coreutils.Which - , Streamly.Coreutils.FileTest.Common if os(windows) exposed-modules: Streamly.Coreutils.FileTest.Windows @@ -148,7 +148,8 @@ library Streamly.Coreutils.FileTest.Posix , Streamly.Coreutils.Id other-modules: - Streamly.Coreutils.String + Streamly.Coreutils.FileTest.Common + , Streamly.Coreutils.String , Streamly.Coreutils.Uniq default-language: Haskell2010 From 38c6af4083daf594002bfd40996171489ce58741 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 22 Apr 2026 23:39:51 +0530 Subject: [PATCH 02/10] Update readme description/overview --- README.md | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2e91a9e..64836e2 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,27 @@ -# Streamly Coreutils +# Streamly Coreutils (Fast, Concurrent and Powerful) -This repository provides Haskell functions that reimplement common -GNU `coreutils` commands, utilizing the `streamly` library for -efficient, streaming data processing where applicable. The goal is to -offer a functional and highly performant alternative to traditional +This repository provides Haskell functions that reimplement common GNU +`coreutils` commands, utilizing the `streamly` library for efficient, +and concurrent streaming data processing where applicable. The goal is +to offer a highly composable and performant alternative to traditional shell commands within Haskell applications, enabling complex data -transformations, system programming and scripting using a pure functional -paradigm. Where applicable, the implementation is designed to be -highly concurrent, for example, the `ls` equivalent can list directory +transformations, system programming and scripting using a pure +functional paradigm. Where applicable, the implementation is designed +to be concurrent, for example, the `find` equivalent can list directory contents concurrently for improved performance. +# Fast, Concurrent and Powerful + +How is it fast? For example, the serial implementation of `find` is +faster than the fastest yet find implementation which is rust `fd`. How +is it concurrent? Concurrency comes for free using the Haskell streamly +library, so wherever possible the implementation is concurrent and if +you need concurrency somewhere it can be made concurrent trivially. How +is it powerful? For example, the `find` implementation has many choices +like bfs, dfs, interleaved, concurrent unordered, concurrent ordered, +concurrent interleaved, all these are trivial to implement thanks to +Haskell streamly. + ## Implemented Commands Currently, this library provides implementations for the From 75ee5035e272c90a37ebe8be0760e22ea36ac25a Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 23 Apr 2026 00:09:27 +0530 Subject: [PATCH 03/10] Add options for different types of traversal --- src/Streamly/Coreutils/Find.hs | 230 +++++++++++++++++++++------------ 1 file changed, 150 insertions(+), 80 deletions(-) diff --git a/src/Streamly/Coreutils/Find.hs b/src/Streamly/Coreutils/Find.hs index 2603b07..72725ed 100644 --- a/src/Streamly/Coreutils/Find.hs +++ b/src/Streamly/Coreutils/Find.hs @@ -22,7 +22,7 @@ -- let path = fromJust $ Path.fromString "." -- #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -- Stream.fold (Handle.writeChunks stdout) --- $ findByteChunked path +-- $ findByteChunked id path -- #else -- Stream.fold (Handle.writeWith 32000 stdout) -- $ reEncode @@ -71,6 +71,14 @@ module Streamly.Coreutils.Find ( find , findChunked + , serialDfs + , serialBfs + , serialBfsRev + , serialAppend + , serialInterleaved + , parallelUnordered + , parallelInterleaved + , parallelOrdered #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) , findByteChunked #endif @@ -94,7 +102,11 @@ import qualified Streamly.Data.Array as Array import qualified Streamly.FileSystem.DirIO as DirIO import qualified Streamly.Internal.Data.Array as GArray (compactMax') import qualified Streamly.Internal.Data.Stream as Stream - (unfoldEachEndBy, concatIterateDfs, concatIterateBfs, concatIterateBfsRev) + ( unfoldEachEndBy + , concatIterate + , bfsConcatIterate + , altBfsConcatIterate + ) import qualified Streamly.Data.StreamK as StreamK import qualified Streamly.Internal.Data.StreamK as StreamK (concatIterateWith, mergeIterateWith) @@ -125,13 +137,44 @@ import qualified Streamly.Unicode.Stream as Stream -- variants just uncomment the relevant line and comment the currently enabled -- line. -newtype FindOptions = FindOptions {findConcurrent :: Bool} +data FindTraversal + = FindSerialDfs + | FindSerialBfs + | FindSerialBfsRev + | FindSerialAppend + | FindSerialInterleaved + | FindParallelUnordered + | FindParallelInterleaved + | FindParallelOrdered + +newtype FindOptions = FindOptions {findTraversal :: FindTraversal} defaultConfig :: FindOptions -defaultConfig = FindOptions False +defaultConfig = FindOptions FindSerialDfs + +serialDfs :: FindOptions -> FindOptions +serialDfs cfg = cfg {findTraversal = FindSerialDfs} + +serialBfs :: FindOptions -> FindOptions +serialBfs cfg = cfg {findTraversal = FindSerialBfs} + +serialBfsRev :: FindOptions -> FindOptions +serialBfsRev cfg = cfg {findTraversal = FindSerialBfsRev} + +serialAppend :: FindOptions -> FindOptions +serialAppend cfg = cfg {findTraversal = FindSerialAppend} + +serialInterleaved :: FindOptions -> FindOptions +serialInterleaved cfg = cfg {findTraversal = FindSerialInterleaved} + +parallelUnordered :: FindOptions -> FindOptions +parallelUnordered cfg = cfg {findTraversal = FindParallelUnordered} + +parallelInterleaved :: FindOptions -> FindOptions +parallelInterleaved cfg = cfg {findTraversal = FindParallelInterleaved} -concurrent :: Bool -> FindOptions -> FindOptions -concurrent opt cfg = cfg {findConcurrent = opt} +parallelOrdered :: FindOptions -> FindOptions +parallelOrdered cfg = cfg {findTraversal = FindParallelOrdered} {-# INLINE recReadOpts #-} recReadOpts :: ReadOptions -> ReadOptions @@ -155,37 +198,48 @@ reEncode = id #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -- Fastest implementation, only works for posix as of now. -findByteChunked :: Path -> Stream IO (Array Word8) -findByteChunked path = - Stream.catRights - -- Serial - -- $ Stream.concatIterateDfs streamDirMaybe -- 154 ms - -- $ Stream.concatIterateBfs streamDirMaybe -- 154 ms - -- $ Stream.concatIterateBfsRev streamDirMaybe -- 154 ms - - -- Serial using stream append and interleave - -- $ concatIterateWith StreamK.append -- 154 ms - -- $ mergeIterateWith StreamK.interleave -- 154 ms - - -- Concurrent - -- XXX To reduce concurrency overhead, perform buffering in each worker - -- and post the buffer or return [Path] and then unfold it. - $ Stream.parConcatIterate id streamDir -- 94 ms - -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 94 ms - -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 154 ms - - $ Stream.fromPure (Left [path]) +findByteChunked :: (FindOptions -> FindOptions) -> Path -> Stream IO (Array Word8) +findByteChunked f path = + Stream.catRights $ + case findTraversal (f defaultConfig) of + FindSerialDfs -> + Stream.concatIterate streamDirMaybe -- 154 ms + $ Stream.fromPure (Left [path]) + FindSerialBfs -> + Stream.bfsConcatIterate streamDirMaybe -- 154 ms + $ Stream.fromPure (Left [path]) + FindSerialBfsRev -> + Stream.altBfsConcatIterate streamDirMaybe -- 154 ms + $ Stream.fromPure (Left [path]) + FindSerialAppend -> + concatIterateWith StreamK.append -- 154 ms + $ Stream.fromPure (Left [path]) + FindSerialInterleaved -> + mergeIterateWith StreamK.interleave -- 154 ms + $ Stream.fromPure (Left [path]) + FindParallelUnordered -> + -- XXX To reduce concurrency overhead, perform buffering in + -- each worker and post the buffer or return [Path] and + -- then unfold it. + Stream.parConcatIterate id streamDir -- 94 ms + $ Stream.fromPure (Left [path]) + FindParallelInterleaved -> + Stream.parConcatIterate (Stream.interleaved True) streamDir -- 94 ms + $ Stream.fromPure (Left [path]) + FindParallelOrdered -> + Stream.parConcatIterate (Stream.ordered True) streamDir -- 154 ms + $ Stream.fromPure (Left [path]) where - concatIterateWith f = + concatIterateWith combine = StreamK.toStream - . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.concatIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream - mergeIterateWith f = + mergeIterateWith combine = StreamK.toStream - . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.mergeIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream -- cfg = Stream.eager False . Stream.maxBuffer 2000 . Stream.maxThreads 2 @@ -198,35 +252,44 @@ findByteChunked path = -- Faster than the find implementation below findChunked :: (FindOptions -> FindOptions) -> Path -> Stream IO [Path] -findChunked _opts path = - Stream.catRights - - -- Serial using streams - -- $ Stream.concatIterateDfs streamDirMaybe -- 264 ms - -- $ Stream.concatIterateBfs streamDirMaybe -- 264 ms - -- $ Stream.concatIterateBfsRev streamDirMaybe -- 264 ms - - -- Serial using stream append and interleave - -- $ concatIterateWith StreamK.append -- 164 ms - -- $ mergeIterateWith StreamK.interleave -- 194 ms - - -- Concurrent - $ Stream.parConcatIterate id streamDir -- 124 ms - -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 134 ms - -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 174 ms - - $ Stream.fromPure (Left [path]) +findChunked f path = + Stream.catRights $ + case findTraversal (f defaultConfig) of + FindSerialDfs -> + Stream.concatIterate streamDirMaybe -- 264 ms + $ Stream.fromPure (Left [path]) + FindSerialBfs -> + Stream.bfsConcatIterate streamDirMaybe -- 264 ms + $ Stream.fromPure (Left [path]) + FindSerialBfsRev -> + Stream.altBfsConcatIterate streamDirMaybe -- 264 ms + $ Stream.fromPure (Left [path]) + FindSerialAppend -> + concatIterateWith StreamK.append -- 164 ms + $ Stream.fromPure (Left [path]) + FindSerialInterleaved -> + mergeIterateWith StreamK.interleave -- 194 ms + $ Stream.fromPure (Left [path]) + FindParallelUnordered -> + Stream.parConcatIterate id streamDir -- 124 ms + $ Stream.fromPure (Left [path]) + FindParallelInterleaved -> + Stream.parConcatIterate (Stream.interleaved True) streamDir -- 134 ms + $ Stream.fromPure (Left [path]) + FindParallelOrdered -> + Stream.parConcatIterate (Stream.ordered True) streamDir -- 174 ms + $ Stream.fromPure (Left [path]) where - concatIterateWith f = + concatIterateWith combine = StreamK.toStream - . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.concatIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream - mergeIterateWith f = + mergeIterateWith combine = StreamK.toStream - . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.mergeIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream streamDir :: Either [Path] b -> Stream IO (Either [Path] [Path]) @@ -236,41 +299,48 @@ findChunked _opts path = streamDirMaybe = either (Just . Dir.readEitherChunks recReadOpts) (const Nothing) find :: (FindOptions -> FindOptions) -> Path -> Stream IO Path -find _opts path = - Stream.catRights - - -- Serial using unfolds - -- $ Stream.unfoldIterateDfs unfoldDir -- 284 ms - -- May fail with too many open files - -- $ Stream.unfoldIterateBfs unfoldDir - -- $ Stream.unfoldIterateBfsRev unfoldDir -- 344 ms - - -- Serial using streams - -- $ Stream.concatIterateDfs streamDirMaybe -- 274 ms - -- $ Stream.concatIterateBfs streamDirMaybe -- 274 ms - -- $ Stream.concatIterateBfsRev streamDirMaybe -- 264 ms - - -- Serial using stream append and interleave - -- $ concatIterateWith StreamK.append -- 204 ms - -- $ mergeIterateWith StreamK.interleave -- 304 ms - - -- Concurrent - $ Stream.parConcatIterate id streamDir -- 174 ms - -- $ Stream.parConcatIterate (Stream.interleaved True) streamDir -- 224 ms - -- $ Stream.parConcatIterate (Stream.ordered True) streamDir -- 234 ms - - $ Stream.fromPure (Left path) +find f path = + Stream.catRights $ + case findTraversal (f defaultConfig) of + FindSerialDfs -> + -- Stream.unfoldIterateDfs unfoldDir -- 284 ms + Stream.concatIterate streamDirMaybe -- 274 ms + $ Stream.fromPure (Left path) + FindSerialBfs -> + -- May fail with too many open files: + -- Stream.unfoldIterateBfs unfoldDir + Stream.bfsConcatIterate streamDirMaybe -- 274 ms + $ Stream.fromPure (Left path) + FindSerialBfsRev -> + -- Stream.unfoldIterateBfsRev unfoldDir -- 344 ms + Stream.altBfsConcatIterate streamDirMaybe -- 264 ms + $ Stream.fromPure (Left path) + FindSerialAppend -> + concatIterateWith StreamK.append -- 204 ms + $ Stream.fromPure (Left path) + FindSerialInterleaved -> + mergeIterateWith StreamK.interleave -- 304 ms + $ Stream.fromPure (Left path) + FindParallelUnordered -> + Stream.parConcatIterate id streamDir -- 174 ms + $ Stream.fromPure (Left path) + FindParallelInterleaved -> + Stream.parConcatIterate (Stream.interleaved True) streamDir -- 224 ms + $ Stream.fromPure (Left path) + FindParallelOrdered -> + Stream.parConcatIterate (Stream.ordered True) streamDir -- 234 ms + $ Stream.fromPure (Left path) where - concatIterateWith f = + concatIterateWith combine = StreamK.toStream - . StreamK.concatIterateWith f (StreamK.fromStream . streamDir) + . StreamK.concatIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream - mergeIterateWith f = + mergeIterateWith combine = StreamK.toStream - . StreamK.mergeIterateWith f (StreamK.fromStream . streamDir) + . StreamK.mergeIterateWith combine (StreamK.fromStream . streamDir) . StreamK.fromStream streamDir :: Either Path b -> Stream IO (Either Path Path) From bfd1fe573be2a433ea2e0dbb57fc4501ec8804eb Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 23 Apr 2026 01:14:06 +0530 Subject: [PATCH 04/10] Add an hfd executable to use the "Find" module --- app/hfd.hs | 141 +++++++++++++++++++++++++++++++++++++++ streamly-coreutils.cabal | 12 ++++ 2 files changed, 153 insertions(+) create mode 100644 app/hfd.hs diff --git a/app/hfd.hs b/app/hfd.hs new file mode 100644 index 0000000..fce7789 --- /dev/null +++ b/app/hfd.hs @@ -0,0 +1,141 @@ +{-# LANGUAGE CPP #-} + +module Main (main) where + +import System.IO (stdout) + +import Options.Applicative + ( Parser + , ParserInfo + , briefDesc + , execParser + , fullDesc + , header + , help + , helper + , info + , long + , metavar + , optional + , progDesc + , strArgument + , (<**>) + ) +import qualified Options.Applicative as OA +import qualified Streamly.Data.Stream.Prelude as Stream +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +import qualified Streamly.Data.Array as Array +import qualified Streamly.Data.Unfold as Unfold +import qualified Streamly.Internal.Data.Stream as Stream (unfoldEachEndBy) +import qualified Streamly.Unicode.Stream as Unicode +#endif +import qualified Streamly.FileSystem.Handle as Handle +import qualified Streamly.FileSystem.Path as Path + +import Streamly.Coreutils.Find + ( FindOptions + , parallelInterleaved + , parallelOrdered + , parallelUnordered + , serialAppend + , serialBfs + , serialBfsRev + , serialDfs + , serialInterleaved + ) +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +import Streamly.Coreutils.Find (findByteChunked) +#else +import Streamly.Coreutils.Find (findChunked) +#endif + +data Config = Config + { cfgTraversal :: FindOptions -> FindOptions + , cfgRoot :: FilePath + } + +data Traversal + = TraversalDfs + | TraversalBfs + | TraversalBfsRev + | TraversalAppend + | TraversalInterleaved + | TraversalParallel + | TraversalParallelInterleaved + | TraversalParallelOrdered + +toTraversalConfig :: Traversal -> FindOptions -> FindOptions +toTraversalConfig traversal = + case traversal of + TraversalDfs -> serialDfs + TraversalBfs -> serialBfs + TraversalBfsRev -> serialBfsRev + TraversalAppend -> serialAppend + TraversalInterleaved -> serialInterleaved + TraversalParallel -> parallelUnordered + TraversalParallelInterleaved -> parallelInterleaved + TraversalParallelOrdered -> parallelOrdered + +mkConfig :: Traversal -> Maybe FilePath -> Config +mkConfig traversal mPath = + Config + { cfgTraversal = toTraversalConfig traversal + , cfgRoot = maybe "." id mPath + } + +traversalParser :: Parser Traversal +traversalParser = + OA.flag' TraversalBfs + (long "bfs" <> help "Breadth-first traversal") + OA.<|> OA.flag' TraversalBfsRev + (long "bfs-rev" <> help "Reverse breadth-first traversal") + OA.<|> OA.flag' TraversalAppend + (long "append" <> help "Serial append traversal") + OA.<|> OA.flag' TraversalInterleaved + (long "interleaved" <> help "Serial interleaved traversal") + OA.<|> OA.flag' TraversalParallel + (long "parallel" <> help "Parallel unordered traversal") + OA.<|> OA.flag' TraversalParallelInterleaved + (long "parallel-interleaved" <> help "Parallel interleaved traversal") + OA.<|> OA.flag' TraversalParallelOrdered + (long "parallel-ordered" <> help "Parallel ordered traversal") + OA.<|> OA.flag' TraversalDfs + (long "dfs" <> help "Depth-first traversal") + OA.<|> OA.pure TraversalDfs + +configParser :: Parser Config +configParser = + mkConfig + <$> traversalParser + <*> optional + (strArgument + (metavar "PATH" <> help "Root path to search")) + +parserInfo :: ParserInfo Config +parserInfo = + info + (configParser <**> helper) + (fullDesc + <> briefDesc + <> progDesc "A basic fd-like driver for Streamly.Coreutils.Find." + <> header "hfd") + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +#endif + +main :: IO () +main = do + cfg <- execParser parserInfo + path <- Path.fromString (cfgRoot cfg) +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) + Stream.fold (Handle.writeChunks stdout) + $ findByteChunked (cfgTraversal cfg) path +#else + Stream.fold (Handle.writeWith 32000 stdout) + $ Unicode.encodeUtf8 + $ Unicode.decodeUtf16le + $ Stream.unfoldEachEndBy 10 Array.reader + $ fmap Path.toArray + $ Stream.unfoldEach Unfold.fromList + $ findChunked (cfgTraversal cfg) path +#endif diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index a1119e5..e2205fa 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -154,6 +154,18 @@ library default-language: Haskell2010 +executable hfd + import: compile-options, default-extensions + main-is: hfd.hs + hs-source-dirs: app + build-depends: + base + , optparse-applicative >= 0.17 && < 0.19 + , streamly + , streamly-core + , streamly-coreutils + default-language: Haskell2010 + ------------------------------------------------------------------------------- -- Benchmarks ------------------------------------------------------------------------------- From 1d38fdf74f2285b8655f538149bd15f455be6040 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 23 Apr 2026 01:35:12 +0530 Subject: [PATCH 05/10] Use fusion-pugin and optimization options for the lib and exe --- streamly-coreutils.cabal | 56 +++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index e2205fa..17055e6 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -36,6 +36,11 @@ extra-doc-files: , design/proposal.md , design/design-notes.md +flag fusion-plugin + description: Use fusion plugin for best performance + manual: True + default: True + common compile-options default-language: Haskell2010 @@ -98,8 +103,47 @@ common default-extensions -- UndecidableInstances -- Does not show any perf impact -- UnboxedTuples -- interferes with (#.) +common exe-dependencies + build-depends: + base + , optparse-applicative >= 0.17 && < 0.19 + , streamly + , streamly-core + , streamly-coreutils + +common perf-options + ghc-options: -O2 + -fdicts-strict + -fspec-constr-recursive=16 + -fmax-worker-args=16 + -Wall + -Wcompat + -Wunrecognised-warning-flags + -Widentities + -Wincomplete-record-updates + -Wincomplete-uni-patterns + -Wredundant-constraints + -Wnoncanonical-monad-instances + if impl(ghc >= 9.8) + ghc-options: -Wno-x-partial + + if flag(fusion-plugin) + ghc-options: -fplugin Fusion.Plugin + build-depends: + fusion-plugin >= 0.2.6 && < 0.3 + +common exe-options + import: exe-dependencies, perf-options + default-language: Haskell2010 + hs-source-dirs: app + +common exe-options-threaded + import: exe-options + ghc-options: -threaded + -with-rtsopts=-N + library - import: compile-options, default-extensions + import: compile-options, default-extensions, perf-options build-depends: base >= 4.8 && < 5 , directory >= 1.2.2 && < 1.4 @@ -155,16 +199,8 @@ library default-language: Haskell2010 executable hfd - import: compile-options, default-extensions + import: compile-options, default-extensions, exe-options-threaded main-is: hfd.hs - hs-source-dirs: app - build-depends: - base - , optparse-applicative >= 0.17 && < 0.19 - , streamly - , streamly-core - , streamly-coreutils - default-language: Haskell2010 ------------------------------------------------------------------------------- -- Benchmarks From 8ea03959ad08e4be12a30fa8be437685b703387e Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 23 Apr 2026 05:31:28 +0530 Subject: [PATCH 06/10] Implement maxResults --- app/hfd.hs | 53 +++++++++--------- src/Streamly/Coreutils/Find.hs | 98 ++++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 34 deletions(-) diff --git a/app/hfd.hs b/app/hfd.hs index fce7789..7918a96 100644 --- a/app/hfd.hs +++ b/app/hfd.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - module Main (main) where import System.IO (stdout) @@ -17,23 +15,20 @@ import Options.Applicative , long , metavar , optional + , option , progDesc , strArgument , (<**>) ) import qualified Options.Applicative as OA import qualified Streamly.Data.Stream.Prelude as Stream -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -import qualified Streamly.Data.Array as Array -import qualified Streamly.Data.Unfold as Unfold -import qualified Streamly.Internal.Data.Stream as Stream (unfoldEachEndBy) -import qualified Streamly.Unicode.Stream as Unicode -#endif import qualified Streamly.FileSystem.Handle as Handle import qualified Streamly.FileSystem.Path as Path import Streamly.Coreutils.Find ( FindOptions + , findByteChunked + , maxResults , parallelInterleaved , parallelOrdered , parallelUnordered @@ -43,15 +38,11 @@ import Streamly.Coreutils.Find , serialDfs , serialInterleaved ) -#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -import Streamly.Coreutils.Find (findByteChunked) -#else -import Streamly.Coreutils.Find (findChunked) -#endif data Config = Config { cfgTraversal :: FindOptions -> FindOptions , cfgRoot :: FilePath + , cfgMaxResults :: Maybe Int } data Traversal @@ -76,11 +67,12 @@ toTraversalConfig traversal = TraversalParallelInterleaved -> parallelInterleaved TraversalParallelOrdered -> parallelOrdered -mkConfig :: Traversal -> Maybe FilePath -> Config -mkConfig traversal mPath = +mkConfig :: Traversal -> Maybe Int -> Maybe FilePath -> Config +mkConfig traversal mMaxResults mPath = Config { cfgTraversal = toTraversalConfig traversal , cfgRoot = maybe "." id mPath + , cfgMaxResults = mMaxResults } traversalParser :: Parser Traversal @@ -107,6 +99,11 @@ configParser :: Parser Config configParser = mkConfig <$> traversalParser + <*> optional + (option (OA.eitherReader parsePositiveInt) + (long "max-results" + <> metavar "N" + <> help "Stop after emitting N results")) <*> optional (strArgument (metavar "PATH" <> help "Root path to search")) @@ -120,22 +117,20 @@ parserInfo = <> progDesc "A basic fd-like driver for Streamly.Coreutils.Find." <> header "hfd") -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -#endif - main :: IO () main = do cfg <- execParser parserInfo path <- Path.fromString (cfgRoot cfg) -#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) + let applyConfig opts = + maybe id maxResults (cfgMaxResults cfg) $ + cfgTraversal cfg opts Stream.fold (Handle.writeChunks stdout) - $ findByteChunked (cfgTraversal cfg) path -#else - Stream.fold (Handle.writeWith 32000 stdout) - $ Unicode.encodeUtf8 - $ Unicode.decodeUtf16le - $ Stream.unfoldEachEndBy 10 Array.reader - $ fmap Path.toArray - $ Stream.unfoldEach Unfold.fromList - $ findChunked (cfgTraversal cfg) path -#endif + $ findByteChunked applyConfig path + +parsePositiveInt :: String -> Either String Int +parsePositiveInt str = + case reads str of + [(n, "")] + | n > 0 -> Right n + | otherwise -> Left "N must be positive" + _ -> Left "N must be an integer" diff --git a/src/Streamly/Coreutils/Find.hs b/src/Streamly/Coreutils/Find.hs index 72725ed..802c47f 100644 --- a/src/Streamly/Coreutils/Find.hs +++ b/src/Streamly/Coreutils/Find.hs @@ -85,9 +85,12 @@ module Streamly.Coreutils.Find -- * Options , FindOptions + , maxResults ) where +import Data.Function ((&)) +import Data.Functor.Identity (runIdentity) import Data.Maybe (fromJust) import Data.Word (Word8) import Streamly.Data.Array (Array) @@ -100,12 +103,17 @@ import System.IO (stdout, hSetBuffering, BufferMode(LineBuffering)) import qualified Streamly.Data.Stream.Prelude as Stream import qualified Streamly.Data.Array as Array import qualified Streamly.FileSystem.DirIO as DirIO -import qualified Streamly.Internal.Data.Array as GArray (compactMax') +import qualified Streamly.Internal.Data.Array as GArray + ( compactMax' + , read + , unsafeSliceOffLen + ) import qualified Streamly.Internal.Data.Stream as Stream ( unfoldEachEndBy , concatIterate , bfsConcatIterate , altBfsConcatIterate + , postscanlMaybe ) import qualified Streamly.Data.StreamK as StreamK import qualified Streamly.Internal.Data.StreamK as StreamK @@ -125,6 +133,11 @@ import qualified Streamly.Internal.FileSystem.Posix.ReadDir as Dir import qualified Streamly.Unicode.Stream as Stream #endif +import Streamly.Internal.Data.Scanl (Step(..), Scanl(..)) +import qualified Streamly.Internal.Data.Scanl as Scanl +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Array as Array + -- -- Running on a sample directory tree the concurrent rust "fd" tool took 150 ms -- (real time). On the same tree the fastest variant using Haskell streamly @@ -147,10 +160,17 @@ data FindTraversal | FindParallelInterleaved | FindParallelOrdered -newtype FindOptions = FindOptions {findTraversal :: FindTraversal} +data FindOptions = FindOptions + { findTraversal :: FindTraversal + , findMaxResults :: Maybe Int + } defaultConfig :: FindOptions -defaultConfig = FindOptions FindSerialDfs +defaultConfig = + FindOptions + { findTraversal = FindSerialDfs + , findMaxResults = Nothing + } serialDfs :: FindOptions -> FindOptions serialDfs cfg = cfg {findTraversal = FindSerialDfs} @@ -176,6 +196,9 @@ parallelInterleaved cfg = cfg {findTraversal = FindParallelInterleaved} parallelOrdered :: FindOptions -> FindOptions parallelOrdered cfg = cfg {findTraversal = FindParallelOrdered} +maxResults :: Int -> FindOptions -> FindOptions +maxResults n cfg = cfg {findMaxResults = Just (max 0 n)} + {-# INLINE recReadOpts #-} recReadOpts :: ReadOptions -> ReadOptions {-# INLINE reEncode #-} @@ -196,12 +219,70 @@ recReadOpts = reEncode = id #endif +data Counts = Counts !Int !Int deriving Show + +{-# INLINE countStep #-} +countStep :: Monad m => Counts -> Word8 -> m (Step Counts (Either Int Int)) +countStep (Counts l c) ch = + let l1 = if ch == 10 then l - 1 else l + in if l1 == 0 + then return $ Done $ Left (c + 1) + else return $ Partial $ Counts l1 (c + 1) + +{-# INLINE countExtract #-} +countExtract :: Monad m => Counts -> m (Either a Int) +countExtract (Counts l _) = return $ Right l + +{-# INLINE count #-} +count :: Monad m => Int -> Fold.Fold m Word8 (Either Int Int) +count l = Fold.foldtM' countStep (return $ Partial (Counts l 0 )) countExtract + +-- XXX Scanl is an awkward abstraction for the case when we are emitting every +-- element and just need to transform the elements using the state. We need a +-- smapM instead for this case. In the scan we are forced to use a Maybe and +-- then catMaybe unnecessarily to store the elements. Because only in the +-- initial state we do not have an element. +-- +{-# INLINE scanStep #-} +scanStep :: Monad m => + (Int, Maybe (Array Word8)) + -> Array Word8 + -> m (Step (Int, Maybe (Array Word8)) (Maybe (Array Word8))) +scanStep (n, _) arr = do + r <- Array.read arr & Stream.fold (count n) + case r of + Left len -> return $ Done $ Just (Array.unsafeSliceOffLen 0 len arr) + Right cnt -> + if cnt /= 0 + then return $ Partial (cnt, Just arr) + else return $ Done (Just arr) + +{-# INLINE scanExtract #-} +scanExtract :: Monad m => (Int, Maybe (Array Word8)) -> m (Maybe (Array Word8)) +scanExtract (_, arr) = return arr + +{-# INLINE scanFinal #-} +scanFinal :: Monad m => (Int, Maybe (Array Word8)) -> m (Maybe (Array Word8)) +scanFinal (_, arr) = return arr + +{-# INLINE takeN #-} +takeN :: Int -> Stream IO (Array Word8) -> Stream IO (Array Word8) +takeN n + | n <= 0 = const Stream.nil + | otherwise = + Stream.postscanlMaybe + (Scanl + scanStep + (return (Partial (n, Nothing))) + scanExtract + scanFinal) + #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -- Fastest implementation, only works for posix as of now. findByteChunked :: (FindOptions -> FindOptions) -> Path -> Stream IO (Array Word8) findByteChunked f path = - Stream.catRights $ - case findTraversal (f defaultConfig) of + transform $ Stream.catRights $ + case findTraversal opts of FindSerialDfs -> Stream.concatIterate streamDirMaybe -- 154 ms $ Stream.fromPure (Left [path]) @@ -232,6 +313,11 @@ findByteChunked f path = where + {-# INLINE transform #-} + transform s = maybe s (\n -> takeN n s) (findMaxResults opts) + + opts = f defaultConfig + concatIterateWith combine = StreamK.toStream . StreamK.concatIterateWith combine (StreamK.fromStream . streamDir) @@ -253,6 +339,7 @@ findByteChunked f path = -- Faster than the find implementation below findChunked :: (FindOptions -> FindOptions) -> Path -> Stream IO [Path] findChunked f path = + -- XXX implement maxResults Stream.catRights $ case findTraversal (f defaultConfig) of FindSerialDfs -> @@ -300,6 +387,7 @@ findChunked f path = find :: (FindOptions -> FindOptions) -> Path -> Stream IO Path find f path = + -- XXX implement maxResults Stream.catRights $ case findTraversal (f defaultConfig) of FindSerialDfs -> From 0e229db9963ba2daee64e5a8c9b5f1ae3dd8de9d Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 23 Apr 2026 05:33:44 +0530 Subject: [PATCH 07/10] Disable hfd executable on Windows for now --- streamly-coreutils.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index 17055e6..1a898e1 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -201,6 +201,8 @@ library executable hfd import: compile-options, default-extensions, exe-options-threaded main-is: hfd.hs + if os(windows) + buildable: false ------------------------------------------------------------------------------- -- Benchmarks From 3ca64b168e02f239a2b767bf62c7af6e5d7e1cea Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 29 Apr 2026 15:48:33 +0530 Subject: [PATCH 08/10] Update time, optparse-applicative bounds --- streamly-coreutils.cabal | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index 1a898e1..95200d3 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -36,6 +36,10 @@ extra-doc-files: , design/proposal.md , design/design-notes.md +source-repository head + type: git + location: https://github.com/composewell/streamly-coreutils + flag fusion-plugin description: Use fusion plugin for best performance manual: True @@ -106,7 +110,7 @@ common default-extensions common exe-dependencies build-depends: base - , optparse-applicative >= 0.17 && < 0.19 + , optparse-applicative >= 0.17 && < 0.20 , streamly , streamly-core , streamly-coreutils @@ -153,7 +157,7 @@ library , streamly >= 0.11 && < 0.12 , streamly-core >= 0.3 && < 0.4 , streamly-process >= 0.4 && < 0.5 - , time >= 1.9 && < 1.15 + , time >= 1.9 && < 1.16 , unix-compat >= 0.5.4 && < 0.8 if !os(windows) build-depends: unix >= 2.7.0 && < 2.9 @@ -202,7 +206,7 @@ executable hfd import: compile-options, default-extensions, exe-options-threaded main-is: hfd.hs if os(windows) - buildable: false + buildable: False ------------------------------------------------------------------------------- -- Benchmarks From 7949f4683ffe07249f4ce8a2e8def8f7003128ba Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 29 Apr 2026 16:54:55 +0530 Subject: [PATCH 09/10] Disable the module level doctests in find --- src/Streamly/Coreutils/Find.hs | 74 ++++++++++++++++------------------ 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/src/Streamly/Coreutils/Find.hs b/src/Streamly/Coreutils/Find.hs index 802c47f..ef797d2 100644 --- a/src/Streamly/Coreutils/Find.hs +++ b/src/Streamly/Coreutils/Find.hs @@ -15,48 +15,42 @@ -- Examples: -- List the current directory recursively using the internal traversal variants. -- --- >>> :{ --- main :: IO () --- main = do --- hSetBuffering stdout LineBuffering --- let path = fromJust $ Path.fromString "." --- #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) --- Stream.fold (Handle.writeChunks stdout) --- $ findByteChunked id path --- #else --- Stream.fold (Handle.writeWith 32000 stdout) --- $ reEncode --- $ Stream.unfoldEachEndBy 10 Array.reader --- $ fmap Path.toArray --- $ Stream.unfoldEach Unfold.fromList --- $ findChunked id path --- #endif --- :} +-- > main :: IO () +-- > main = do +-- > hSetBuffering stdout LineBuffering +-- > let path = fromJust $ Path.fromString "." +-- > #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +-- > Stream.fold (Handle.writeChunks stdout) +-- > $ findByteChunked id path +-- > #else +-- > Stream.fold (Handle.writeWith 32000 stdout) +-- > $ reEncode +-- > $ Stream.unfoldEachEndBy 10 Array.reader +-- > $ fmap Path.toArray +-- > $ Stream.unfoldEach Unfold.fromList +-- > $ findChunked id path +-- > #endif -- --- >>> :{ --- main :: IO () --- main = do --- hSetBuffering stdout LineBuffering --- let path = fromJust $ Path.fromString "." --- Stream.fold (Handle.writeWith 32000 stdout) --- $ reEncode --- $ Stream.unfoldEachEndBy 10 Array.reader --- $ fmap Path.toArray --- $ Stream.unfoldEach Unfold.fromList --- $ findChunked id path --- :} +-- > main :: IO () +-- > main = do +-- > hSetBuffering stdout LineBuffering +-- > let path = fromJust $ Path.fromString "." +-- > Stream.fold (Handle.writeWith 32000 stdout) +-- > $ reEncode +-- > $ Stream.unfoldEachEndBy 10 Array.reader +-- > $ fmap Path.toArray +-- > $ Stream.unfoldEach Unfold.fromList +-- > $ findChunked id path -- --- >>> :{ --- main :: IO () --- main = do --- hSetBuffering stdout LineBuffering --- let path = fromJust $ Path.fromString "." --- Stream.fold (Handle.writeWith 32000 stdout) --- $ reEncode --- $ Stream.unfoldEachEndBy 10 Array.reader --- $ fmap Path.toArray --- $ find id path --- :} +-- > main :: IO () +-- > main = do +-- > hSetBuffering stdout LineBuffering +-- > let path = fromJust $ Path.fromString "." +-- > Stream.fold (Handle.writeWith 32000 stdout) +-- > $ reEncode +-- > $ Stream.unfoldEachEndBy 10 Array.reader +-- > $ fmap Path.toArray +-- > $ find id path -- -- Compare the above example with GNU @find@ or rust @fd@. To compare listing -- the current directory recursively, use the following commands: From a73bc35ea89c3dcd4fff2615affab6d08fe6051a Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 29 Apr 2026 17:19:20 +0530 Subject: [PATCH 10/10] Disable fusion-plugin by default --- .github/workflows/haskell.yml | 10 ++++++---- streamly-coreutils.cabal | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index 232f285..8bbe3ad 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -209,19 +209,21 @@ jobs: #pack_options: >- #CABAL_PROJECT=cabal.project.d/master + # haddock fails with fusion-plugin - name: ci runner: ubuntu-latest command: cabal ghc_version: 9.4.7 - #pack_options: >- - #CABAL_PROJECT=cabal.project.d/master + pack_options: >- + DISABLE_DOCS=y + # haddock fails with fusion-plugin - name: ci runner: ubuntu-latest command: cabal ghc_version: 9.2.8 - #pack_options: >- - #CABAL_PROJECT=cabal.project.d/master + pack_options: >- + DISABLE_DOCS=y - name: no-docs runner: ubuntu-latest diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index 95200d3..bd30795 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -40,10 +40,12 @@ source-repository head type: git location: https://github.com/composewell/streamly-coreutils +-- Keep default False, haddock fails with it in some cases, not sure if hackage +-- haddock will work correctly. flag fusion-plugin description: Use fusion plugin for best performance manual: True - default: True + default: False common compile-options default-language: Haskell2010