Source code for outrigger.io.core

from ..common import EXON_START, EXON_STOP, JUNCTION_START, JUNCTION_STOP, \
    JUNCTION_ID, CHROM, STRAND


[docs]def add_exons_and_junction_ids(junction_reads):
    """Given junction locations, add exon locations and junction ids

    This assumes that the junction starts are one nucleotide after an exon
    (i.e. where the intron starts not where the exon ends) and similarly for
    the junction stops, that the junctions are one nucleotide before the exon.

    Parameters
    ----------
    junction_reads : pandas.DataFrame
        A tidy table of junction reads, with columns {chrom}, {start}, {stop}
        and {strand}.

    Returns
    -------
    reads_with_exons : pandas.DataFrame
        The same table, with the columns {exon_start}, {exon_stop} and
        {junction_id} added.
    """.format(chrom=CHROM, start=JUNCTION_START, stop=JUNCTION_STOP,
               strand=STRAND, exon_start=EXON_START, exon_stop=EXON_STOP,
               junction_id=JUNCTION_ID)

    # From STAR, exon_cols start one base pair down from the end of the intron
    junction_reads[EXON_START] = junction_reads[JUNCTION_STOP] + 1

    # From STAR, exon_cols stop one base pair up from the start of the intron
    junction_reads[EXON_STOP] = junction_reads[JUNCTION_START] - 1

    junction_reads[JUNCTION_ID] = 'junction:' + \
                                  junction_reads[CHROM].astype(str) + ':' + \
                                  junction_reads[JUNCTION_START].astype(str) \
                                  + '-' \
                                  + junction_reads[JUNCTION_STOP].astype(str) \
                                  + ':' \
                                  + junction_reads[STRAND].astype(str)
    return junction_reads