Skip to content

kaptive.alignment

Alignment

Similar to mappy.Alignment but with additional attributes and methods.

Source code in kaptive/alignment.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class Alignment:
    """
    Similar to `mappy.Alignment` but with additional attributes and methods.
    """

    def __init__(
            self, q: str = None, q_len: int | None = 0, q_st: int | None = 0,
            q_en: int | None = 0, strand: str = None, ctg: str = None,
            ctg_len: int | None = 0, r_st: int | None = 0, r_en: int | None = 0,
            mlen: int | None = 0, blen: int | None = 0, mapq: int | None = 0,
            tags: dict = None):
        self.q = q or 'unknown'  # Query sequence name
        self.q_len = q_len  # Query sequence length
        self.q_st = q_st  # Query start coordinate (0-based)
        self.q_en = q_en  # Query end coordinate (0-based)
        self.strand = strand or 'unknown'  # ‘+’ if query/target on the same strand; ‘-’ if opposite
        self.ctg = ctg or 'unknown'  # Target sequence name
        self.ctg_len = ctg_len  # Target sequence length
        self.r_st = r_st  # Target start coordinate on the original strand (0-based)
        self.r_en = r_en  # Target end coordinate on the original strand (0-based)
        self.mlen = mlen  # Number of matching bases in the alignment
        self.blen = blen  # Number bases, including gaps, in the alignment
        self.mapq = mapq  # Mapping quality (0-255 with 255 for missing)
        self.tags = tags or {}  # {tag: value} pairs

    @classmethod
    def from_paf_line(cls, line: str):
        """
        Parse a line in PAF format and return an Alignment object.
        """
        if len(line := line.split('\t')) < 12:
            raise AlignmentError(f"Line has < 12 columns: {line}")
        try:
            return Alignment(  # Parse standard fields
                q=line[0], q_len=int(line[1]), q_st=int(line[2]), q_en=int(line[3]), strand=line[4], ctg=line[5],
                ctg_len=int(line[6]), r_st=int(line[7]), r_en=int(line[8]), mlen=int(line[9]), blen=int(line[10]),
                mapq=int(line[11]),
                tags={(x := t.split(":", 2))[0]: int(x[2]) if x[1] == "i" else float(x[2]) if x[1] == "f" else x[2] for
                      t in line[12:]}
            )
        except Exception as e:
            raise AlignmentError(f"Error parsing PAF line: {line}") from e

    def __repr__(self):
        return f'{self.q}:{self.q_st}-{self.q_en} {self.ctg}:{self.r_st}-{self.r_en} {self.strand}'

    def __len__(self):
        return self.num_bases

    def __getattr__(self, item):
        if item in self.__dict__:  # First check attributes
            return self.__dict__[item]
        elif item in self.tags:  # Then check tags
            return self.tags[item]
        else:
            raise AttributeError(f"{self.__class__.__name__} object has no attribute {item}")

    @property
    def partial(self) -> bool:
        if self.q_len > self.ctg_len:
            return True
        if (self.r_en == self.ctg_len or self.r_st == 0) and (self.q_en - self.q_st) < self.q_len:
            return True
        ref_start_diff = self.r_st
        ref_end_diff = self.ctg_len - self.r_en
        query_start_diff = self.q_st if self.strand == '+' else self.q_len - self.q_en
        query_end_diff = self.q_len - self.q_en if self.strand == '+' else self.q_st
        if query_start_diff > ref_start_diff or query_end_diff > ref_end_diff:
            return True
        return False

from_paf_line(line) classmethod

Parse a line in PAF format and return an Alignment object.

Source code in kaptive/alignment.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@classmethod
def from_paf_line(cls, line: str):
    """
    Parse a line in PAF format and return an Alignment object.
    """
    if len(line := line.split('\t')) < 12:
        raise AlignmentError(f"Line has < 12 columns: {line}")
    try:
        return Alignment(  # Parse standard fields
            q=line[0], q_len=int(line[1]), q_st=int(line[2]), q_en=int(line[3]), strand=line[4], ctg=line[5],
            ctg_len=int(line[6]), r_st=int(line[7]), r_en=int(line[8]), mlen=int(line[9]), blen=int(line[10]),
            mapq=int(line[11]),
            tags={(x := t.split(":", 2))[0]: int(x[2]) if x[1] == "i" else float(x[2]) if x[1] == "f" else x[2] for
                  t in line[12:]}
        )
    except Exception as e:
        raise AlignmentError(f"Error parsing PAF line: {line}") from e

cull(keep, alignments, overlap_fraction=0.1)

Yield alignments that do not conflict with keep alignment

Source code in kaptive/alignment.py
91
92
93
94
95
96
97
def cull(keep: Alignment, alignments: Iterable[Alignment],
         overlap_fraction: float = 0.1) -> Generator[Alignment]:
    """Yield alignments that do not conflict with keep alignment"""
    for a in alignments:
        if (a.ctg != keep.ctg or  # Different contig
                range_overlap((a.r_st, a.r_en), (keep.r_st, keep.r_en), skip_sort=True) / a.blen < overlap_fraction):
            yield a

cull_filtered(pred, alignments)

Cull and flatten alignments that don't overlap with alignments matching the predicate. E.g. list(cull_filtered(lambda i: i.q in query_genes, alignments))

Source code in kaptive/alignment.py
109
110
111
112
113
114
115
116
117
118
119
120
def cull_filtered(pred: callable, alignments: Iterable[Alignment]) -> Generator[Alignment]:
    """
    Cull and flatten alignments that don't overlap with alignments matching the predicate.
    E.g. list(cull_filtered(lambda i: i.q in query_genes, alignments))
    """
    keep, other = [], []
    [(keep if pred(a) else other).append(a) for a in alignments]
    other = cull_all(other)  # Remove conflicting other alignments
    for i in keep:  # Remove other alignments overlapping best match gene alignments
        other = list(cull(i, other))
        yield i
    yield from other

group_alns(alignments, key='q')

Group alignments by a key

Source code in kaptive/alignment.py
86
87
88
def group_alns(alignments: Iterable[Alignment], key: str = 'q') -> Generator[tuple[str, Generator[Alignment]]]:
    """Group alignments by a key"""
    yield from groupby(sorted(alignments, key=lambda x: getattr(x, key)), key=lambda x: getattr(x, key))