Source code for outrigger.tests.test_commandline


import filecmp
import os

import pandas as pd
import pandas.util.testing as pdt
import pytest


[docs]class TestSubcommand(object):

[docs]    def test___init__(self, tmpdir):
        from outrigger.commandline import Subcommand

        output = os.path.join(tmpdir.strpath, 'blue_ivy')

        kwargs = dict(asdf="beyonce", jkl=1234, output=output)

        subcommand = Subcommand(**kwargs)

        for key, value in kwargs.items():
            assert getattr(subcommand, key) == value
        for folder in subcommand.folders:
            assert os.path.exists(folder)


[docs]def assert_directories_equal(dir1, dir2, ignore=None,
                             sortables=('exon', 'junction')):
    """Compare contents of subdirectories to assert they are equal"""

    directory_comparison = filecmp.dircmp(dir1, dir2, ignore=ignore)

    directory_comparison.report_full_closure()
    assert len(directory_comparison.left_only) == 0
    assert len(directory_comparison.right_only) == 0

    for subdir in directory_comparison.subdirs.values():
        print(subdir.common_files)

        assert len(subdir.left_only) == 0
        assert len(subdir.right_only) == 0
        for filename in subdir.common_files:
            filename1 = os.path.join(subdir.left, filename)
            filename2 = os.path.join(subdir.right, filename)

            # If the files are csv or bed tables, check that they're equal
            df1, df2 = None, None

            if filename.endswith('.csv'):
                df1 = pd.read_csv(filename1)
                df2 = pd.read_csv(filename2)

                df1.sort_values(df1.columns.tolist(), inplace=True)
                df2.sort_values(df2.columns.tolist(), inplace=True)
            elif filename.endswith('.bed'):
                df1 = pd.read_table(filename1, header=None)
                df2 = pd.read_table(filename2, header=None)

                df1.sort_values([3, 0, 1, 2], kind='mergesort', inplace=True)
                df2.sort_values([3, 0, 1, 2], kind='mergesort', inplace=True)

            if df1 is not None:
                df1.index = range(len(df1.index))
                df2.index = range(len(df2.index))

                df1.sort_index(axis=1, inplace=True)
                df2.sort_index(axis=1, inplace=True)

                pdt.assert_frame_equal(df1, df2)
                continue

            # Otherwise, just use the file sizes
            size1 = os.stat(filename1).st_size
            size2 = os.stat(filename2).st_size
            pdt.assert_equal(size1, size2,
                             msg='{f1} ({size1}) and {f2} ({size2}) have '
                                 'different sizes'.format(f1=filename1,
                                                          f2=filename2,
                                                          size1=size1,
                                                          size2=size2))


[docs]class TestCommandLine(object):

[docs]    def test_no_arguments(self, capsys):
        """
        User passes no args, should fail with SystemExit
        """

        from outrigger.commandline import CommandLine

        CommandLine()

        text = '[-h] [--version] {index,validate,psi} ...'
        out, err = capsys.readouterr()

        # Argparse for Python2 sends the version info to stderr, but Python3
        # argparse sends the info to stdout so we concatenate here
        outerr = out + err
        assert 'usage' in outerr
        assert text in outerr

[docs]    def test_help(self, capsys):
        """
        User asks for help, should SystemExit and give helpful output
        """
        from outrigger.commandline import CommandLine

        with pytest.raises(SystemExit):
            CommandLine(['--help'])

        out, err = capsys.readouterr()
        assert 'outrigger' in out
        assert 'psi' in out
        assert 'validate' in out
        assert 'usage' in out

[docs]    def test_main_version(self, capsys):
        from outrigger.commandline import CommandLine
        from outrigger import __version__

        with pytest.raises(SystemExit):
            CommandLine(['--version'])

        out, err = capsys.readouterr()

        # Argparse for Python2 sends the version info to stderr, but Python3
        # argparse sends the info to stdout so we concatenate here
        outerr = out + err
        assert 'outrigger' in outerr
        assert __version__ in outerr

[docs]    def test_main_index(self, tmpdir, tasic2016_unprocessed, sj_filenames,
                        tasic2016_outrigger_output_index):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--sj-out-tab']
        arguments.extend(sj_filenames)
        arguments.extend(['--gtf', gtf, '--output', output_folder,
                          '--n-jobs', '1'])
        # import pdb; pdb.set_trace()
        # assert False
        CommandLine(arguments)

        dir1 = os.path.join(output_folder, 'index')
        dir2 = tasic2016_outrigger_output_index
        ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv',
                  # Databases get stored in a weird random way... we're still
                  # checking that the final gtfs are the same
                  'gencode.vM10.annotation.subset.gtf.db']
        assert_directories_equal(dir1, dir2, ignore)

[docs]    def test_main_index_reads_csv(self, tmpdir, tasic2016_unprocessed,
                                  tasic2016_outrigger_output,
                                  tasic2016_outrigger_output_index):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--junction-reads-csv']
        arguments.append(os.path.join(tasic2016_outrigger_output, 'junctions',
                                      'reads.csv'))
        arguments.extend(['--gtf', gtf, '--output', output_folder,
                          '--n-jobs', '1'])
        # import pdb; pdb.set_trace()
        # assert False
        CommandLine(arguments)

        dir1 = os.path.join(output_folder, 'index')
        dir2 = tasic2016_outrigger_output_index
        ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv',
                  # Databases get stored in a weird random way... we're still
                  # checking that the final gtfs are the same
                  'gencode.vM10.annotation.subset.gtf.db']
        assert_directories_equal(dir1, dir2, ignore)

[docs]    def test_main_index_parallelized(self, tmpdir, tasic2016_unprocessed,
                                     sj_filenames,
                                     tasic2016_outrigger_output_index):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--sj-out-tab']
        arguments.extend(sj_filenames)
        arguments.extend(['--gtf', gtf, '--output', output_folder,
                          '--n-jobs', '-1'])
        # import pdb; pdb.set_trace()
        # assert False
        CommandLine(arguments)

        dir1 = os.path.join(output_folder, 'index')
        dir2 = tasic2016_outrigger_output_index
        ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv',
                  # Databases get stored in a weird random way... we're still
                  # checking that the final gtfs are the same
                  'gencode.vM10.annotation.subset.gtf.db']
        assert_directories_equal(dir1, dir2, ignore)

[docs]    def test_main_index_bam(self, tmpdir, tasic2016_unprocessed,
                            bam_filenames, tasic2016_outrigger_output_bam):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--bam']
        arguments.extend(bam_filenames)
        arguments.extend(['--gtf', gtf, '--output', output_folder])
        # import pdb; pdb.set_trace()
        # assert False
        CommandLine(arguments)

        dir1 = os.path.join(output_folder, 'index')
        dir2 = os.path.join(tasic2016_outrigger_output_bam, 'index')
        ignore = ['psi', '.DS_Store', 'validated', 'splice_sites.csv',
                  # Databases get stored in a weird random way... we're still
                  # checking that the final gtfs are the same
                  'gencode.vM10.annotation.subset.gtf.db']
        assert_directories_equal(dir1, dir2, ignore)

[docs]    def test_main_validate(self, tmpdir, negative_control_folder,
                           negative_control_output):
        from outrigger.commandline import CommandLine

        args = ['validate', '--genome',
                '{folder}/chromsizes'.format(
                    folder=negative_control_folder),
                '--fasta',
                '{folder}/genome.fasta'.format(
                    folder=negative_control_folder),
                '--output', tmpdir.strpath,
                '--index', os.path.join(negative_control_output, 'index')]
        CommandLine(args)

        dir1 = tmpdir.strpath
        dir2 = negative_control_output
        assert_directories_equal(dir1, dir2,
                                 ignore=['.DS_Store', 'junctions', 'gtf'])

[docs]    def test_main_psi(self, tmpdir, tasic2016_unprocessed,
                      tasic2016_outrigger_output, sj_filenames):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--sj-out-tab']
        arguments.extend(sj_filenames)
        arguments.extend(['--gtf', gtf, '--output', output_folder])
        CommandLine(arguments)

        args = ['psi', '--output', output_folder, '--n-jobs', '1']
        CommandLine(args)

        dir1 = output_folder
        dir2 = tasic2016_outrigger_output
        assert_directories_equal(dir1, dir2, ignore=['.DS_Store'])

[docs]    def test_main_psi_parallelized(self, tmpdir, tasic2016_unprocessed,
                                   tasic2016_outrigger_output, sj_filenames):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        gtf = os.path.join(tasic2016_unprocessed, 'gtf',
                           'gencode.vM10.annotation.subset.gtf')
        arguments = ['index', '--sj-out-tab']
        arguments.extend(sj_filenames)
        arguments.extend(['--gtf', gtf, '--output', output_folder])
        CommandLine(arguments)

        args = ['psi', '--output', output_folder, '--n-jobs', '-1']
        CommandLine(args)

        dir1 = output_folder
        dir2 = tasic2016_outrigger_output
        assert_directories_equal(dir1, dir2, ignore=['.DS_Store'])

[docs]    def test_main_psi_bam(self, tmpdir, tasic2016_outrigger_output_index,
                          tasic2016_outrigger_output_bam, bam_filenames):
        from outrigger.commandline import CommandLine

        output_folder = tmpdir.strpath

        args = ['psi', '--output', output_folder, '--n-jobs', '1',
                '--index', tasic2016_outrigger_output_index,
                '--bam']
        args.extend(bam_filenames)
        CommandLine(args)

        dir1 = output_folder
        dir2 = tasic2016_outrigger_output_bam
        assert_directories_equal(dir1, dir2, ignore=['.DS_Store', 'index'])