Source code for outrigger.tests.test_commandline


import filecmp
import os

import pandas as pd
import pandas.util.testing as pdt
import pytest


[docs]class TestSubcommand(object):
[docs] def test___init__(self, tmpdir): from outrigger.commandline import Subcommand output = os.path.join(tmpdir.strpath, 'blue_ivy') kwargs = dict(asdf="beyonce", jkl=1234, output=output) subcommand = Subcommand(**kwargs) for key, value in kwargs.items(): assert getattr(subcommand, key) == value for folder in subcommand.folders: assert os.path.exists(folder)
[docs]def assert_directories_equal(dir1, dir2, ignore=None, sortables=('exon', 'junction')): """Compare contents of subdirectories to assert they are equal""" directory_comparison = filecmp.dircmp(dir1, dir2, ignore=ignore) directory_comparison.report_full_closure() assert len(directory_comparison.left_only) == 0 assert len(directory_comparison.right_only) == 0 for subdir in directory_comparison.subdirs.values(): print(subdir.common_files) assert len(subdir.left_only) == 0 assert len(subdir.right_only) == 0 for filename in subdir.common_files: filename1 = os.path.join(subdir.left, filename) filename2 = os.path.join(subdir.right, filename) # If the files are csv or bed tables, check that they're equal df1, df2 = None, None if filename.endswith('.csv'): df1 = pd.read_csv(filename1) df2 = pd.read_csv(filename2) df1.sort_values(df1.columns.tolist(), inplace=True) df2.sort_values(df2.columns.tolist(), inplace=True) elif filename.endswith('.bed'): df1 = pd.read_table(filename1, header=None) df2 = pd.read_table(filename2, header=None) df1.sort_values([3, 0, 1, 2], kind='mergesort', inplace=True) df2.sort_values([3, 0, 1, 2], kind='mergesort', inplace=True) if df1 is not None: df1.index = range(len(df1.index)) df2.index = range(len(df2.index)) df1.sort_index(axis=1, inplace=True) df2.sort_index(axis=1, inplace=True) pdt.assert_frame_equal(df1, df2) continue # Otherwise, just use the file sizes size1 = os.stat(filename1).st_size size2 = os.stat(filename2).st_size pdt.assert_equal(size1, size2, msg='{f1} ({size1}) and {f2} ({size2}) have ' 'different sizes'.format(f1=filename1, f2=filename2, size1=size1, size2=size2))
[docs]class TestCommandLine(object):
[docs] def test_no_arguments(self, capsys): """ User passes no args, should fail with SystemExit """ from outrigger.commandline import CommandLine CommandLine() text = '[-h] [--version] {index,validate,psi} ...' out, err = capsys.readouterr() # Argparse for Python2 sends the version info to stderr, but Python3 # argparse sends the info to stdout so we concatenate here outerr = out + err assert 'usage' in outerr assert text in outerr
[docs] def test_help(self, capsys): """ User asks for help, should SystemExit and give helpful output """ from outrigger.commandline import CommandLine with pytest.raises(SystemExit): CommandLine(['--help']) out, err = capsys.readouterr() assert 'outrigger' in out assert 'psi' in out assert 'validate' in out assert 'usage' in out
[docs] def test_main_version(self, capsys): from outrigger.commandline import CommandLine from outrigger import __version__ with pytest.raises(SystemExit): CommandLine(['--version']) out, err = capsys.readouterr() # Argparse for Python2 sends the version info to stderr, but Python3 # argparse sends the info to stdout so we concatenate here outerr = out + err assert 'outrigger' in outerr assert __version__ in outerr
[docs] def test_main_index(self, tmpdir, tasic2016_unprocessed, sj_filenames, tasic2016_outrigger_output_index): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--sj-out-tab'] arguments.extend(sj_filenames) arguments.extend(['--gtf', gtf, '--output', output_folder, '--n-jobs', '1']) # import pdb; pdb.set_trace() # assert False CommandLine(arguments) dir1 = os.path.join(output_folder, 'index') dir2 = tasic2016_outrigger_output_index ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv', # Databases get stored in a weird random way... we're still # checking that the final gtfs are the same 'gencode.vM10.annotation.subset.gtf.db'] assert_directories_equal(dir1, dir2, ignore)
[docs] def test_main_index_reads_csv(self, tmpdir, tasic2016_unprocessed, tasic2016_outrigger_output, tasic2016_outrigger_output_index): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--junction-reads-csv'] arguments.append(os.path.join(tasic2016_outrigger_output, 'junctions', 'reads.csv')) arguments.extend(['--gtf', gtf, '--output', output_folder, '--n-jobs', '1']) # import pdb; pdb.set_trace() # assert False CommandLine(arguments) dir1 = os.path.join(output_folder, 'index') dir2 = tasic2016_outrigger_output_index ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv', # Databases get stored in a weird random way... we're still # checking that the final gtfs are the same 'gencode.vM10.annotation.subset.gtf.db'] assert_directories_equal(dir1, dir2, ignore)
[docs] def test_main_index_parallelized(self, tmpdir, tasic2016_unprocessed, sj_filenames, tasic2016_outrigger_output_index): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--sj-out-tab'] arguments.extend(sj_filenames) arguments.extend(['--gtf', gtf, '--output', output_folder, '--n-jobs', '-1']) # import pdb; pdb.set_trace() # assert False CommandLine(arguments) dir1 = os.path.join(output_folder, 'index') dir2 = tasic2016_outrigger_output_index ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv', # Databases get stored in a weird random way... we're still # checking that the final gtfs are the same 'gencode.vM10.annotation.subset.gtf.db'] assert_directories_equal(dir1, dir2, ignore)
[docs] def test_main_index_bam(self, tmpdir, tasic2016_unprocessed, bam_filenames, tasic2016_outrigger_output_bam): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--bam'] arguments.extend(bam_filenames) arguments.extend(['--gtf', gtf, '--output', output_folder]) # import pdb; pdb.set_trace() # assert False CommandLine(arguments) dir1 = os.path.join(output_folder, 'index') dir2 = os.path.join(tasic2016_outrigger_output_bam, 'index') ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv', # Databases get stored in a weird random way... we're still # checking that the final gtfs are the same 'gencode.vM10.annotation.subset.gtf.db'] assert_directories_equal(dir1, dir2, ignore)
[docs] def test_main_validate(self, tmpdir, negative_control_folder, negative_control_output): from outrigger.commandline import CommandLine args = ['validate', '--genome', '{folder}/chromsizes'.format( folder=negative_control_folder), '--fasta', '{folder}/genome.fasta'.format( folder=negative_control_folder), '--output', tmpdir.strpath, '--index', os.path.join(negative_control_output, 'index')] CommandLine(args) dir1 = tmpdir.strpath dir2 = negative_control_output assert_directories_equal(dir1, dir2, ignore=['.DS_Store', 'junctions', 'gtf'])
[docs] def test_main_psi(self, tmpdir, tasic2016_unprocessed, tasic2016_outrigger_output, sj_filenames): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--sj-out-tab'] arguments.extend(sj_filenames) arguments.extend(['--gtf', gtf, '--output', output_folder]) CommandLine(arguments) args = ['psi', '--output', output_folder, '--n-jobs', '1'] CommandLine(args) dir1 = output_folder dir2 = tasic2016_outrigger_output assert_directories_equal(dir1, dir2, ignore=['.DS_Store'])
[docs] def test_main_psi_parallelized(self, tmpdir, tasic2016_unprocessed, tasic2016_outrigger_output, sj_filenames): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath gtf = os.path.join(tasic2016_unprocessed, 'gtf', 'gencode.vM10.annotation.subset.gtf') arguments = ['index', '--sj-out-tab'] arguments.extend(sj_filenames) arguments.extend(['--gtf', gtf, '--output', output_folder]) CommandLine(arguments) args = ['psi', '--output', output_folder, '--n-jobs', '-1'] CommandLine(args) dir1 = output_folder dir2 = tasic2016_outrigger_output assert_directories_equal(dir1, dir2, ignore=['.DS_Store'])
[docs] def test_main_psi_bam(self, tmpdir, tasic2016_outrigger_output_index, tasic2016_outrigger_output_bam, bam_filenames): from outrigger.commandline import CommandLine output_folder = tmpdir.strpath args = ['psi', '--output', output_folder, '--n-jobs', '1', '--index', tasic2016_outrigger_output_index, '--bam'] args.extend(bam_filenames) CommandLine(args) dir1 = output_folder dir2 = tasic2016_outrigger_output_bam assert_directories_equal(dir1, dir2, ignore=['.DS_Store', 'index'])