split_csv ¶

CLI module for splitting CSV data files.

Functions:

get_args –

Get the arguments when using from the commandline.
main –

Connect CSV and YAML configuration and handle sanity checks.
run –

Run the CSV splitting script.

get_args ¶

get_args() -> Namespace

Get the arguments when using from the commandline.

Source code in src/stimulus/cli/split_csv.py

def get_args() -> argparse.Namespace:
    """Get the arguments when using from the commandline."""
    parser = argparse.ArgumentParser(description="Split a CSV data file.")
    parser.add_argument(
        "-c",
        "--csv",
        type=str,
        required=True,
        metavar="FILE",
        help="The file path for the csv containing all data",
    )
    parser.add_argument(
        "-y",
        "--yaml",
        type=str,
        required=True,
        metavar="FILE",
        help="The YAML config file that hold all parameter info",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        required=True,
        metavar="FILE",
        help="The output file path to write the noised csv",
    )
    parser.add_argument(
        "-f",
        "--force",
        type=bool,
        required=False,
        default=False,
        help="Overwrite the split column if it already exists in the csv",
    )

    return parser.parse_args()

main ¶

main(
    data_csv: str,
    config_yaml: str,
    out_path: str,
    *,
    force: bool = False
) -> None

Connect CSV and YAML configuration and handle sanity checks.

Parameters:

data_csv (str) –

Path to input CSV file.
config_yaml (str) –

Path to config YAML file.
out_path (str) –

Path to output split CSV.
force (bool, default: False ) –

Overwrite the split column if it already exists in the CSV.

Source code in src/stimulus/cli/split_csv.py

def main(data_csv: str, config_yaml: str, out_path: str, *, force: bool = False) -> None:
    """Connect CSV and YAML configuration and handle sanity checks.

    Args:
        data_csv: Path to input CSV file.
        config_yaml: Path to config YAML file.
        out_path: Path to output split CSV.
        force: Overwrite the split column if it already exists in the CSV.
    """
    # create a DatasetProcessor object from the config and the csv
    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)

    # create a split manager from the config
    split_config = processor.dataset_manager.config.split
    with open(config_yaml) as f:
        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))
    split_loader = SplitLoader(seed=yaml_config.global_params.seed)
    split_loader.initialize_splitter_from_config(split_config)
    split_manager = SplitManager(split_loader)

    # apply the split method to the data
    processor.add_split(split_manager=split_manager, force=force)

    # save the modified csv
    processor.save(out_path)

run ¶

run() -> None

Run the CSV splitting script.

Source code in src/stimulus/cli/split_csv.py

def run() -> None:
    """Run the CSV splitting script."""
    args = get_args()
    main(args.csv, args.yaml, args.output, force=args.force)

split_csv ¶

get_args ¶

main ¶

run ¶

Feedback