-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Library for reading Blast XML output
--   
--   This library contains a data structure and functions for reading
--   output from the BLAST sequence alignment program.
@package blastxml
@version 0.3.2


-- | This module implements a hierarchical data structure for BLAST
--   results.
--   
--   BLAST is a tool for searching in (biological) sequences for
--   similarity. This library is tested against NCBI-blast version 2.2.14.
--   There exist several independent versions of BLAST, so expect some
--   incompatbilities if you're using a different BLAST version.
--   
--   For parsing BLAST results, the XML format (blastall -m 7) is by far
--   the most robust choice, and is implemented in the
--   <a>Bio.Alignment.BlastXML</a> module.
--   
--   The format is straightforward (and non-recursive). For more
--   information on BLAST, check
--   <a>http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/information3.html</a>
module Bio.BlastData

-- | The Aux field in the BLAST output includes match information that
--   depends on the BLAST flavor (blastn, blastx, or blastp). This data
--   structure captures those variations.
data Aux

-- | blastn
Strands :: !Strand -> !Strand -> Aux

-- | blastx
Frame :: !Strand -> !Int -> Aux

-- | A <a>BlastResult</a> is the root of the hierarchy.
data BlastResult
BlastResult :: !ByteString -> !ByteString -> !ByteString -> !Integer -> [BlastRecord] -> BlastResult
[blastprogram, blastversion, blastdate] :: BlastResult -> !ByteString
[blastreferences] :: BlastResult -> !ByteString
[database] :: BlastResult -> !ByteString
[dbsequences, dbchars] :: BlastResult -> !Integer
[results] :: BlastResult -> [BlastRecord]

-- | Each query sequence generates a <a>BlastRecord</a>
data BlastRecord
BlastRecord :: !SeqLabel -> !Int -> [BlastHit] -> BlastRecord
[query] :: BlastRecord -> !SeqLabel
[qlength] :: BlastRecord -> !Int
[hits] :: BlastRecord -> [BlastHit]

-- | Each match between a query and a target sequence (or subject) is a
--   <a>BlastHit</a>.
data BlastHit
BlastHit :: !ByteString -> !SeqLabel -> !Int -> [BlastMatch] -> BlastHit
[hitId] :: BlastHit -> !ByteString
[subject] :: BlastHit -> !SeqLabel
[slength] :: BlastHit -> !Int
[matches] :: BlastHit -> [BlastMatch]

-- | A <a>BlastHit</a> may contain multiple separate matches (typcially
--   when an indel causes a frameshift that blastx is unable to bridge).
data BlastMatch
BlastMatch :: !Double -> !Double -> (Int, Int) -> !Int -> !ByteString -> !Aux -> BlastMatch
[bits] :: BlastMatch -> !Double
[e_val] :: BlastMatch -> !Double
[identity] :: BlastMatch -> (Int, Int)
[q_from, q_to, h_from, h_to] :: BlastMatch -> !Int
[qseq, hseq] :: BlastMatch -> !ByteString
[aux] :: BlastMatch -> !Aux
instance GHC.Show.Show Bio.BlastData.BlastResult
instance GHC.Show.Show Bio.BlastData.BlastRecord
instance GHC.Show.Show Bio.BlastData.BlastHit
instance GHC.Show.Show Bio.BlastData.BlastMatch
instance GHC.Classes.Eq Bio.BlastData.Aux
instance GHC.Show.Show Bio.BlastData.Aux


-- | Parse blast XML output.
--   
--   If you use a recent version of NCBI BLAST and specify XML output
--   (blastall -m 7), this module should be able to parse the result into a
--   hierarchical <a>BlastResult</a> structure.
--   
--   While the process may consume a bit of memory, the parsing is lazy,
--   and file sizes of several gigabytes can be parsed (see e.g. the xml2x
--   tool for an example). To parse XML, we use <a>TagSoup</a>.
module Bio.BlastXML

-- | Parse BLAST results in XML format
readXML :: FilePath -> IO BlastResult
parseXML :: ByteString -> IO BlastResult
