% dtlearn(Goal, Examples, Attributes, DT) % Given Examples and Attributes induces a decision tree DT for Goal. % Splits on the attribute that resuts in the smallest error % Works for noise-free features, with Boolean attributes. dtlearn(Goal, Examples, Atts , Val) <- all_examples_agree(Goal, Examples, Val). dtlearn(Goal, Examples, Atts, if(Cond,YT,NT)) <- examples_disagree(Goal, Examples) & select_split(Goal, Examples, Atts, Cond, Rem_Atts) & split(Examples, Cond, Yes, No) & dtlearn(Goal, Yes, Rem_Atts, YT) & dtlearn(Goal, No, Rem_Atts, NT). % all_examples_agree(Goal, Examples, Val) is true if all examples agree that % Val is the value of attribute Goal. all_examples_agree(_,[],_). all_examples_agree(Att,[Obj|Rest],Val) <- val(Obj,Att,Val) & all_examples_agree(Att,Rest,Val). % examples_disagree(Goal, Examples) is true if the Examples disagree % on a value of the Goal attribute. examples_disagree(Goal, [Ex|RExs]) <- val(Ex,Goal,Val) & examples_disagree_on_Val(Goal, RExs ,Val). % examples_disagree_on_Val(Goal, Examples, Val) is true if there is an % example in Examples where the Goal attribute has a value different % than Val. examples_disagree_on_Val(Goal, [Ex|RExs],Val) <- val(Ex,Goal,Val1) & Val \= Val1. examples_disagree_on_Val(Goal, [Ex|RExs],Val) <- val(Ex,Goal,Val) & examples_disagree_on_Val(Goal, RExs, Val). % split(Examples,Att,T,F) is true if T is % the examples in Examples with attribute Att % having value YesVal and F is the examples with % attribute Att having the other value. split([],_,[],[]). split([Obj|Rest],Cond,[Obj|Yes],No) <- val(Obj,Cond,true) & split(Rest,Cond,Yes,No). split([Obj|Rest],Cond,Yes,[Obj|No]) <- val(Obj,Cond,false) & split(Rest,Cond,Yes,No). % select_split(Goal, Examples, Atts, Cond, Rem_Atts) % is true if Cond is the best attribute in Atts to determine Goal from % Examples. select_split(Goal, Examples, [Att|Ratts], Cond, Rem_Atts) <- error(Att,Goal,Examples,Err) & find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts). %%% find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts) %%% Ratts is the set of attributes to check %%% Att is the current best attribute, with error Err. %%% Goal is the goal attribute %%% Examples is the set of all examples %%% Cond is the best attribute to split on %%% Rem_Atts is the list of remaining attributes %%% Note: this is notdeterministic. Retrying will find equally %%% good attributes to split on. find_best_split([],BAtt,_,_,_,BAtt,[]). find_best_split([Att|Ratts],BAtt,BErr,Goal,Examples,Cond,[Att|Rem_Atts]) <- error(Att,Goal,Examples,Err) & Err >= BErr & find_best_split(Ratts,BAtt,BErr,Goal,Examples,Cond,Rem_Atts). find_best_split([Att|Ratts],BAtt,BErr,Goal,Examples,Cond,[BAtt|Rem_Atts]) <- error(Att,Goal,Examples,Err) & Err =< BErr & find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts). %%% error(Att,Goal,Examples,Err) is true if Err is the error that will %%% arise when slpitting the Examples on attribute Att when predicting Goal error(Att,Goal,Examples,Err) <- count(Examples,Att,Goal,TT,TF,FT,FF) & Err is min(TT,TF) + min(FT,FF). %%% count(Examples,Att,Goal,TT,TF,FT,FF) is true if %%% TT is the number of Examples where Att=true, Goal=true %%% TF is the number of Examples where Att=true, Goal=false %%% FT is the number of Examples where Att=false, Goal=true %%% FF is the number of Examples where Att=false, Goal=false count([],Att,Goal,0,0,0,0). count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <- val(Ex,Att,true) & val(Ex,Goal,true) & count(Examples,Att,Goal,TT0,TF,FT,FF) & TT is TT0+1. count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <- val(Ex,Att,true) & val(Ex,Goal,false) & count(Examples,Att,Goal,TT,TF0,FT,FF) & TF is TF0+1. count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <- val(Ex,Att,false) & val(Ex,Goal,true) & count(Examples,Att,Goal,TT,TF,FT0,FF) & FT is FT0+1. count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <- val(Ex,Att,false) & val(Ex,Goal,false) & count(Examples,Att,Goal,TT,TF,FT,FF0) & FF is FF0+1. %%% load 'c:/david/book/code-br/code/cilog/ch11/dtlearn_bool.pl'. %%% load 'c:/david/book/code/cilog/ch11/dtlearn_bool.pl'.