22from pandas import DataFrame , read_csv
33from mbdiff .diff_query import DiffQuery , InvalidQuery
44from mbdiff .risk_ratio import risk_ratio
5- from mbdiff .attribute_mining import get_combs
65from numpy import nan
76from mbdiff .apriori import explain as aprio_explain
87
@@ -35,10 +34,10 @@ def diff(
3534 ignored_cols .append (column )
3635 sel_cols = [col for col in df .columns if col not in ignored_cols ]
3736 df = df [sel_cols ]
38- return _apriori_diff (df , max_order , min_support , min_risk , ignored_cols )
37+ return _apriori_diff (df , max_order , min_support , min_risk )
3938
4039
41- def _apriori_diff (df : DataFrame , max_order : int , min_support : float , min_risk : float , ignored_cols : List ):
40+ def _apriori_diff (df : DataFrame , max_order : int , min_support : float , min_risk : float ):
4241 """Explanation mining from attributes close to macrobase DIFF, with use of Apriori."""
4342 explanations = aprio_explain (df , min_support , max_order )
4443 results , invalid = [], []
@@ -50,18 +49,3 @@ def _apriori_diff(df: DataFrame, max_order: int, min_support: float, min_risk: f
5049 else :
5150 results .append (res )
5251 return results , invalid
53-
54-
55- def _naive_impl (df : DataFrame , max_order : int , min_support : float , min_risk : float , ignored_cols : List ):
56- """My own naive implementation of attribute mining, akin to apriori but without minimization."""
57- combinations = get_combs (df , max_order , min_support , ignored_cols )
58- results , invalid = [], []
59- for combination in combinations :
60- rr = risk_ratio (combination , df )
61- res = (rr , combination )
62- if rr is nan or rr <= min_risk :
63- invalid .append (combination )
64- else :
65- results .append (res )
66-
67- return results , invalid
0 commit comments