Source code for outrigger.tests.validate.test_check_splice_sites

import collections
import os

import pandas as pd
import pandas.util.testing as pdt
import pytest


@pytest.fixture
[docs]def exon2_bed(negative_control_folder):
    return os.path.join(negative_control_folder, 'outrigger_output', 'index',
                        'se', 'exon2.bed')


@pytest.fixture
[docs]def do_not_have_mysql():
    """Checks if mysql exists on this platform"""
    for path in os.environ["PATH"].split(os.pathsep):
        if os.path.exists(os.path.join(path, 'msql')):
                return True
    return False


@pytest.fixture
[docs]def genome_name():
    return 'mm10'


@pytest.fixture(params=['upstream', 'downstream'])
[docs]def direction(request):
    return request.param


@pytest.fixture
[docs]def simulated_chromsizes(negative_control_folder):
    return os.path.join(negative_control_folder, 'chromsizes')


@pytest.fixture
[docs]def simulated_fasta(negative_control_folder):
    return os.path.join(negative_control_folder, 'genome.fasta')


@pytest.fixture(params=[
    pytest.mark.skipif('do_not_have_mysql')('genome_name'),
    'filename'])
[docs]def maybe_chromsizes(request, simulated_chromsizes, genome_name):
    if request.param == 'genome_name':
        return genome_name
    elif request.param == 'filename':
        return simulated_chromsizes


[docs]def test_splice_site_str_to_tuple():
    from outrigger.validate.check_splice_sites import splice_site_str_to_tuple

    test = splice_site_str_to_tuple('GT/AG,AT/AC')
    true = 'GT/AG', 'AT/AC'

    assert test == true


[docs]def test_maybe_read_chromsizes(maybe_chromsizes, genome_name):
    from outrigger.validate.check_splice_sites import maybe_read_chromsizes
    test = maybe_read_chromsizes(maybe_chromsizes)

    if maybe_chromsizes != genome_name:
        true = collections.OrderedDict({'simulated': (0, 1000)})
    else:
        chromsizes = '''chr1	195471971
chr2	182113224
chrX	171031299
chr3	160039680
chr4	156508116
chr5	151834684
chr6	149736546
chr7	145441459
chr10	130694993
chr8	129401213
chr14	124902244
chr9	124595110
chr11	122082543
chr13	120421639
chr12	120129022
chr15	104043685
chr16	98207768
chr17	94987271
chrY	91744698
chr18	90702639
chr19	61431566
chr5_JH584299_random	953012
chrX_GL456233_random	336933
chrY_JH584301_random	259875
chr1_GL456211_random	241735
chr4_GL456350_random	227966
chr4_JH584293_random	207968
chr1_GL456221_random	206961
chr5_JH584297_random	205776
chr5_JH584296_random	199368
chr5_GL456354_random	195993
chr4_JH584294_random	191905
chr5_JH584298_random	184189
chrY_JH584300_random	182347
chr7_GL456219_random	175968
chr1_GL456210_random	169725
chrY_JH584303_random	158099
chrY_JH584302_random	155838
chr1_GL456212_random	153618
chrUn_JH584304	114452
chrUn_GL456379	72385
chr4_GL456216_random	66673
chrUn_GL456393	55711
chrUn_GL456366	47073
chrUn_GL456367	42057
chrUn_GL456239	40056
chr1_GL456213_random	39340
chrUn_GL456383	38659
chrUn_GL456385	35240
chrUn_GL456360	31704
chrUn_GL456378	31602
chrUn_GL456389	28772
chrUn_GL456372	28664
chrUn_GL456370	26764
chrUn_GL456381	25871
chrUn_GL456387	24685
chrUn_GL456390	24668
chrUn_GL456394	24323
chrUn_GL456392	23629
chrUn_GL456382	23158
chrUn_GL456359	22974
chrUn_GL456396	21240
chrUn_GL456368	20208
chrM	16299
chr4_JH584292_random	14945
chr4_JH584295_random	1976'''.split('\n')
        chromsizes = [x.split() for x in chromsizes]
        true = collections.OrderedDict(
            [(chrom, (0, int(length))) for chrom, length in chromsizes])
    pdt.assert_dict_equal(test, true)


[docs]def test_read_splice_sites(exon2_bed, direction, simulated_fasta,
                           simulated_chromsizes, negative_control_folder):
    from outrigger.validate.check_splice_sites import read_splice_sites

    test = read_splice_sites(exon2_bed, simulated_chromsizes, simulated_fasta,
                             direction)

    csv = os.path.join(negative_control_folder,
                       'exon2_{}_splice_sites.csv'.format(direction))
    true = pd.read_csv(csv, index_col=0, squeeze=True, header=None, names=None)
    true.name = None
    true.index.name = None

    pdt.assert_series_equal(test, true)