Skip to content

Commit 337c178

Browse files
Issue-6: Fix query parameter validation
1 parent 6651785 commit 337c178

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

mbdiff/__main__.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,37 @@
11
import click
2-
from mbdiff.diff_query import DiffQuery
2+
from mbdiff.diff_query import DiffQuery, OPS, InvalidQuery
33
from mbdiff.diff import diff_file
44
from mbdiff.pretty_print import present_explanations, present_invalid
55

66

7+
def validate_query(ctx, param, value):
8+
if not value:
9+
raise click.BadParameter("Cannot be empty")
10+
value_sp = value.split()
11+
if len(value_sp) != 3:
12+
raise click.BadParameter(
13+
"""Query needs to be in format: "COLUMN OPERATOR VALUE" OPERATOR must be one of: <,>,=,<=,>="""
14+
)
15+
metric, op, value = value_sp
16+
if op not in OPS:
17+
raise click.BadParameter("Operator must be one of: <,>,=,<=,>=")
18+
return value_sp
19+
20+
21+
722
@click.command()
823
@click.argument("data", type=click.Path(exists=True))
924
@click.option("--min-support", default=0.1)
1025
@click.option("--min-risk", default=0.1)
1126
@click.option("--max-order", default=3)
12-
@click.option("--query")
27+
@click.option("--query", callback=validate_query, default=None)
1328
def main(data, min_support, min_risk, max_order, query):
14-
metric, op, value = query.split()
29+
metric, op, value = query
1530
query = DiffQuery(metric, op, value)
16-
explanations, invalid = diff_file(data, query, max_order, min_risk, min_support)
31+
try:
32+
explanations, invalid = diff_file(data, query, max_order, min_risk, min_support)
33+
except InvalidQuery as e:
34+
raise click.BadParameter(e)
1735
if explanations:
1836
explanations = sorted(explanations, key=lambda x: x[0], reverse=True)
1937
print("Explanations")
@@ -27,6 +45,5 @@ def main(data, min_support, min_risk, max_order, query):
2745
print("There were no invalid or below threshold attribute combinations")
2846

2947

30-
3148
if __name__ == "__main__":
3249
main()

mbdiff/diff.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pandas import DataFrame, read_csv
2-
from mbdiff.diff_query import DiffQuery
2+
from mbdiff.diff_query import DiffQuery, InvalidQuery
33
from mbdiff.risk_ratio import risk_ratio
44
from mbdiff.attribute_mining import get_combs
55
from numpy import nan
@@ -14,6 +14,8 @@ def diff_file(
1414
):
1515
"""Given a tab delimited file and a distinguishing metric return explanations."""
1616
df = read_csv(path_to_df)
17+
if query.column not in df.columns:
18+
raise InvalidQuery("Query column is not present in the data")
1719
print("Outliers:")
1820
print(query.apply(df).to_string())
1921
return diff(df, query, max_order, min_risk, min_support)

mbdiff/diff_query.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,6 @@ def fn(row):
5454
return "outlier" if self.row_matches(row) else "inlier"
5555

5656
df["outlier"] = df.apply(fn, axis=1)
57+
58+
class InvalidQuery(ValueError):
59+
pass

mbdiff/pretty_print.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def present_explanations(explanations: List) -> str:
1515
pres_df.fillna("-", inplace=True)
1616
return tabulate(pres_df, headers="keys")
1717

18+
1819
def present_invalid(combinations: List[Dict]) -> str:
1920
"""Pretty print invalid attr combinations."""
2021
pres_df = DataFrame(combinations)

0 commit comments

Comments
 (0)