Source code for outrigger.tests.validate.test_check_splice_sites
import collections
import os
import pandas as pd
import pandas.util.testing as pdt
import pytest
@pytest.fixture
[docs]def exon2_bed(negative_control_folder):
return os.path.join(negative_control_folder, 'outrigger_output', 'index',
'se', 'exon2.bed')
@pytest.fixture
[docs]def do_not_have_mysql():
"""Checks if mysql exists on this platform"""
for path in os.environ["PATH"].split(os.pathsep):
if os.path.exists(os.path.join(path, 'msql')):
return True
return False
@pytest.fixture
[docs]def genome_name():
return 'mm10'
@pytest.fixture(params=['upstream', 'downstream'])
[docs]def direction(request):
return request.param
@pytest.fixture
[docs]def simulated_chromsizes(negative_control_folder):
return os.path.join(negative_control_folder, 'chromsizes')
@pytest.fixture
[docs]def simulated_fasta(negative_control_folder):
return os.path.join(negative_control_folder, 'genome.fasta')
@pytest.fixture(params=[
pytest.mark.skipif('do_not_have_mysql')('genome_name'),
'filename'])
[docs]def maybe_chromsizes(request, simulated_chromsizes, genome_name):
if request.param == 'genome_name':
return genome_name
elif request.param == 'filename':
return simulated_chromsizes
[docs]def test_splice_site_str_to_tuple():
from outrigger.validate.check_splice_sites import splice_site_str_to_tuple
test = splice_site_str_to_tuple('GT/AG,AT/AC')
true = 'GT/AG', 'AT/AC'
assert test == true
[docs]def test_maybe_read_chromsizes(maybe_chromsizes, genome_name):
from outrigger.validate.check_splice_sites import maybe_read_chromsizes
test = maybe_read_chromsizes(maybe_chromsizes)
if maybe_chromsizes != genome_name:
true = collections.OrderedDict({'simulated': (0, 1000)})
else:
chromsizes = '''chr1 195471971
chr2 182113224
chrX 171031299
chr3 160039680
chr4 156508116
chr5 151834684
chr6 149736546
chr7 145441459
chr10 130694993
chr8 129401213
chr14 124902244
chr9 124595110
chr11 122082543
chr13 120421639
chr12 120129022
chr15 104043685
chr16 98207768
chr17 94987271
chrY 91744698
chr18 90702639
chr19 61431566
chr5_JH584299_random 953012
chrX_GL456233_random 336933
chrY_JH584301_random 259875
chr1_GL456211_random 241735
chr4_GL456350_random 227966
chr4_JH584293_random 207968
chr1_GL456221_random 206961
chr5_JH584297_random 205776
chr5_JH584296_random 199368
chr5_GL456354_random 195993
chr4_JH584294_random 191905
chr5_JH584298_random 184189
chrY_JH584300_random 182347
chr7_GL456219_random 175968
chr1_GL456210_random 169725
chrY_JH584303_random 158099
chrY_JH584302_random 155838
chr1_GL456212_random 153618
chrUn_JH584304 114452
chrUn_GL456379 72385
chr4_GL456216_random 66673
chrUn_GL456393 55711
chrUn_GL456366 47073
chrUn_GL456367 42057
chrUn_GL456239 40056
chr1_GL456213_random 39340
chrUn_GL456383 38659
chrUn_GL456385 35240
chrUn_GL456360 31704
chrUn_GL456378 31602
chrUn_GL456389 28772
chrUn_GL456372 28664
chrUn_GL456370 26764
chrUn_GL456381 25871
chrUn_GL456387 24685
chrUn_GL456390 24668
chrUn_GL456394 24323
chrUn_GL456392 23629
chrUn_GL456382 23158
chrUn_GL456359 22974
chrUn_GL456396 21240
chrUn_GL456368 20208
chrM 16299
chr4_JH584292_random 14945
chr4_JH584295_random 1976'''.split('\n')
chromsizes = [x.split() for x in chromsizes]
true = collections.OrderedDict(
[(chrom, (0, int(length))) for chrom, length in chromsizes])
pdt.assert_dict_equal(test, true)
[docs]def test_read_splice_sites(exon2_bed, direction, simulated_fasta,
simulated_chromsizes, negative_control_folder):
from outrigger.validate.check_splice_sites import read_splice_sites
test = read_splice_sites(exon2_bed, simulated_chromsizes, simulated_fasta,
direction)
csv = os.path.join(negative_control_folder,
'exon2_{}_splice_sites.csv'.format(direction))
true = pd.read_csv(csv, index_col=0, squeeze=True, header=None, names=None)
true.name = None
true.index.name = None
pdt.assert_series_equal(test, true)