From 20909f995d48cdb3f16c3499a871ffbbd3947164 Mon Sep 17 00:00:00 2001 From: luevard <99143550+saucepommefrite@users.noreply.github.com> Date: Tue, 13 Feb 2024 16:49:12 +0100 Subject: [PATCH] :sparkles: Random Blue corner and Red Corner --- __pycache__/analise.cpython-311.pyc | Bin 0 -> 3082 bytes __pycache__/cleanData.cpython-311.pyc | Bin 0 -> 3148 bytes __pycache__/models.cpython-311.pyc | Bin 0 -> 3297 bytes __pycache__/runModel.cpython-311.pyc | Bin 0 -> 3715 bytes cleanData.py | 42 ++++++++++++++++++-------- main.py | 13 +++++--- models.py | 13 ++++++++ runModel.py | 18 ++++++++++- 8 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 __pycache__/analise.cpython-311.pyc create mode 100644 __pycache__/cleanData.cpython-311.pyc create mode 100644 __pycache__/models.cpython-311.pyc create mode 100644 __pycache__/runModel.cpython-311.pyc diff --git a/__pycache__/analise.cpython-311.pyc b/__pycache__/analise.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e1757e77cb47e5277ecca418404646e112f0b87 GIT binary patch literal 3082 zcmcguO>7fK6yBfNT_;ZL1PBfUXi5sy{3L2qpr}z)LHJ1sIY3Lnt>iAd8*tdQt(kQo z!I2D#L=GG{R3ZlsoKmXf;3MjZBT_GG$%km9NR=uM+=AeQDz)#8f3U+(ZyoQxoq6-- z&G>!qXMSyOZ7J(3`vme4#s zp?gNckWof)LtaawRrFOi?6oD@s{Hmu1m=kw^*RzAz|-cXM5nC--DDd;H`^hgTWsZm z6mNNfo5u|vF)b@cO=~8}tPFK{hm}cB=2$9|PI{(KQ#W~+<(iD8CR3K_Bbz$e4E6ce z6E(qeLT)6*vaUJ9TWH2gg34)!t7gvP(eoMW1P-eL6LFbGT*sv8RQhT~5IBbCz-AV+ zYDU51Y4ED>zRCi5Ao((Usx2?&?Fsf{ zIoIbq-fPzCEJMA$*xr0T@I7-V8*p>Zv`OuOk&Kn|oV3pd&Yc^*G+?GpH^rR(>c*2c&m3ygBGP81#y#h^A6G&(Vw8yK5Kma75wKi1; zY^o6^qc$%|2;<5|Al1D32(`Y`@(KV3I+yEugR#4Ie0v=O<@H=#xBjqg!@5aneux3t~Q|Cxy zY`p~wqhUjoX-$&2r)VN>G!ao?gXKDWb9!EXKbRkYXtvY2X?j`L>2;W&0#j!w*j$~= zQm*KQ8c3;}3`6APOc zrq&`|_pUGYEwWOiuN>(soCZcYvgdJMaqy|JI(TYbm7{0mmlD#~f)-ByNy2x>?~LD` zn4c(!Uj^#AIblcqZvyHfL_ssu2rvR+%eJzjE`d`lqhp9BpeA$LoZgrw8=%%~vZ8GO z+xUR2{vO$o-w@YR^%Lhw-mvu*qdCz++lJ7bA{1%5WtjV z-b^vxb~5c#GnIBQ=L1(0DT|Zuz@543W?Zgaz`{KX^*BebIK6hSnS~mZ_EU_RSDkn( z#Ty7$(pgSC$4t}hFcfjY^_vxC`b_1k`6=IpBIY_*owQAN;W=8k=4EWh<;Eo_ac~fB zV6DSWd%cRom4NZE;3ipW)`4SJ2Y57`g40$m7>su|P9^b3yrW@Wa2Q%qMBm#83_~`p z-6iNLnD2p?tpb4{JoL?zlTVL6I~`ndg3QdiB%M%BD)8w)rMwjMwV)-kzi{^cj>TKg zySG0y9<>#}2)YlKx(}DT4+o@cEw=5K8;iG!ok47{6dNqZ1_QF?Ip*GlAA{`&iclqw zm&oxlISw6>laar_xHlljO5|9X9D@$>#};xW(p4s1s8oz^lt`>hVgZTacr=LYEs=d? zvM(U}guw0b`SHRyb~onvco&bd>%!Fi)l1Tb`-dn}8xkMygw%;;%S~N@yMZWRdgUt| zfXC`O5#1znT*t!eUBz3pbk57p(4$~n@XyNbmHdi#3X0WC+ z%k0WI2bohtO4Jg1N^yx_?ht**r6dJE^r3&k!QI2k2(kAt=W`|3-coKLD6|jHZ9&XRK=R|kRd@7i&zl1cvCE3nZ(6{o-3M4LFYBW!pf&4C8~mH zDmg_IEULz`QNR|&XQro4kl+HJQ}VYV0ST^l*^s4Js?6Ui6fH8mzzdsse&M6VU-7D- zOS05aUA;WN*il7VUh9jzU|A&HmOfgazrnAquXel=Eq%c-TJflFs;BD@nCz)h{V zpwD@2=|eX(jd85!yxz@84a2KP2j5cl=Zdscox!)IZlLz86D|GV;Cs6MfXSY^fABr+ zz~Fo8!NL1rR(YBDzz&U95-gc;Hx*rn#fsdO>9Xc9u2Mf965wwV%B>(h0+0A!v*twB^(B{6*S@(3u;l*O%gIK0bAx>#kx&=23B->0C;fWF5y}(BnF$aU@Df(xBJjV zA0Hq8etPB?ri{%QwJ2{2SejWZh(%4-EpujhdFhiGQI!RKQLuz_#uf>icLjsr6u{TS z??Hf3_{;+!W%SA)eX#x8&mMfXJF@RTVf##@m3 zY_f^i=r0*3Gx;pN$Lz6t?2EVRne)|{bK%GTh&3);{d(f-+_z`#_*~Ufz4AD`yH*>k zpPcK38Lh{r8?oso;-V9;;;E{*JGrw_k7pY3j2+K7BkAhq&QEHm_D3e{kqIX=QT_QY z_xO7Cx|15+P4E1=Hou=bZKqCmEk|Sg$B88>AtyXkzV-&@h(t;{r3;*Y9tBQ!n@h~# z*4hRj07%dscSF%oB_w%z+q=|;mc@0_Ae8F^!#1*lJF zf42Z|$G1Htw$_)kV{>8FH@?Byz()5_2tGOmnft)i>utq*yu`imKZSWe@6LT2W?$=@ zPtW(z%m8*Z_YUuQw78O23cLsoMunR;B!%3sw^cC-8Ocm8(Fj?XrNbLEBalH-q8 zc2>&E2Z`j%L}ovcd3N>L`*tEzPh4!!`au&hzMs0J!a1qT%hc?CYIZNNcjfPI)l-)m zsY?~Ma`E?}O6VXM{(STgqkleIo3D)4gOiQmB&1KKpDg`p>GAcQ>*Zy~%ROAWzf{p` z>*b}oce>%7w!PB_$+vfJ+9RjSHx9VK!~Fex<@WB!<$RqRYj9)bd4~&BMjxoagd>%Y zE9cAC@OhZ&)N$g|iy9n_tZop_fMZ!O@%vyQ!|=#5)Pkidx8Ufc9N1f}$51WeH#gvD z#X4cl4U%dL=L~p*;esf`vrNyomVc;YgB79AJ$m<&V;!XiPePS%?GSjwVR`_NSl24U zLr*j@g*K)L#dHl}pU_?=o~D9d&;?b2JHXv>^i+3O^RLLAp}WrC2QER6qAGuYvtXzD z+Wa?=riWn|2PN!Ib%criDpENe8C;g*81K`42A}0Up4SD#pY?mZ zpa+a#Ht6wwkcYA%j}PcQhL{ySKB!BEoRvL3q=${(Y%lOVk5RTy6@m7v63_uv20EyQ zfkxC`phIdO&|$TI4kZWn+4hp0d*rraszqbENHDdgbcIrFLBj;T4Bg6Lt*|&>B(zOF zI8!WW)Y9^|aRC-lt!OekH*j8K{2hXEn|^pMeWOjl(`5rz^vq}`R><7lDVT_9r39uF zGjl!9*O6mCAsB4_`g&s;+T9Kj~)Rs=#!bvg)7P`asZv1@R z^lO#t3Sjq`=pH|l7z(!WO@w9TOVA{@7ckuz&EPGoE^ z65ox)H`Ckca%8d+nFOIUvL}u0N@JUWvXrPuiEY`ICT(Hz0H^ym;X3RyP0qlv05BG- z2LTq+>*=fmtiFykM?L~WAHblOfdF28v2tI$(idO5=?LI^GJ@Kg%nb zvpZ(huLp5vkL9C^X#rQSE8&tNwt z9e+-kL$O>)q0~lIp5r(NowwV&gFd#~dpDhwjoP|{&f4wWK_hm1cTmi3?+!X`xA$rw q#9gYQCK=+ncoj8?+~ho&$a5Fi*E)ge1e?|gOk3A>Oq)hMr2YcTlFq{b literal 0 HcmV?d00001 diff --git a/__pycache__/runModel.cpython-311.pyc b/__pycache__/runModel.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2375691c86e9863a174b04aedaabae42c7053dae GIT binary patch literal 3715 zcmeHKO-vg{6rS~t*Y*MhyP;I2RbBFf9YT#r0mZ6N8&a%7>;t5bwvnvu?qY3f)@XL^ z1hhy*s#K2LVyUTW^+1WZLBRZkK>!Tr97cOkbpQ>t=DC&APxsjNum@8GwsF=yTViV(GR^Awx zDJzy`tQe|dvxWtItwQXq3@;O2BZg_GMH*FFHmIzEh2~h=rLvLD{uX=7WHR|Go%zDj ziIpi=^_oJo%&m%AUDZw7%H;E(-_4M!nXhPi`NOq!DiJ;vVN@|kS$Tjhbbw>7{K3vc z0LUI5^6`+1hq(G$Lw4nBkUc!=<53rna`kxQYgZnJ?BVM^zU~J32n-^X*$z?|1Ww4$ zAercA<5j_2B6qY?e*D8MfORgdEYqyP)3>HWIoAt(($jWHV|r`>o_iPQ;0x zv^(GJ8e>7(4 ztvZczUUaLNj?u{7qD>UTq!I{hbYwkHfWQ>edU1{D8V!*Z8YU}6MWrz)R{8#*y&!UP z#U@!+43-Z(Hxpqs8no8)))XTNCcnwx0)u`AZvjy8TN5}1V0_7}Y!TMS;S}iR+_zJVsmYzJ`+mm5 zV?G{p@z??G*}h-D_yj)BVIL2>K|ViJEiUEHOqEqzr4j(Aij@IXO#XLNv1vzup?BaR zml&Lv&EI`XR}B`C?hsu+t7}7=O~aba&nzf{GYiB{Zf%}fC*~dpw+=DazI&eh+k3lG zGA!Fr7xWT@59`b>E)yCq6YgRt0vChW$;AWwkU2Fy5KV4?f3L!01;HWN7}&e=Y}kuu z{do3uaNvxlG*I3$H_7W3*V&Zk6K+OwVKwBcGRN+>wm<7eC={J>i{mfsUSTet%5&-9ql|kHK zawq4sh0I=G7nAcC&~H=;7}9<;U&7@2KgoGcY}P0 lMd71E)P`;>TscH-=)*#aRa@}a>Z#7Bs{b&yZIs96KLLC35q$sv literal 0 HcmV?d00001 diff --git a/cleanData.py b/cleanData.py index d893b28..dd60264 100644 --- a/cleanData.py +++ b/cleanData.py @@ -7,33 +7,49 @@ from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix, classification_report from models import * +import random from analise import * +columns = ['B_fighter','R_fighter','title_bout', + 'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed', + 'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed', + 'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att', + + 'R_avg_BODY_landed', 'R_avg_HEAD_landed', 'R_avg_TD_att', 'R_avg_TOTAL_STR_landed', + 'R_avg_opp_BODY_att', 'R_avg_opp_HEAD_landed', 'R_avg_opp_LEG_landed', + 'R_avg_opp_SIG_STR_att', 'R_avg_opp_TOTAL_STR_att', + + 'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance'] + +def swap_values(row): + if random.random() > 0.5: + for column in columns: + if column.startswith('B_'): + opposite_column = 'R_' + column[2:] + row[column], row[opposite_column] = row[opposite_column], row[column] + if column.startswith('Winner'): + print(row[column]) + if row[column] == 0: + row[column] = 2 + elif row[column] == 2: + row[column] = 0 + print(row[column]) + return row + return row def getData(): df = pd.read_csv('archive/data.csv') - columns = ['B_fighter','R_fighter','title_bout', - 'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed', - 'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed', - 'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att', - - 'R_avg_BODY_landed', 'R_avg_HEAD_landed', 'R_avg_TD_att', 'R_avg_TOTAL_STR_landed', - 'R_avg_opp_BODY_att', 'R_avg_opp_HEAD_landed', 'R_avg_opp_LEG_landed', - 'R_avg_opp_SIG_STR_att', 'R_avg_opp_TOTAL_STR_att', - - 'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance'] - - limit_date = '2001-04-01' df = df.loc[df['date'] > limit_date, columns] label_encoder = LabelEncoder() - # Convertir les chaînes de caractères en nombres for column in df.select_dtypes(include=['object']).columns: df[column] = label_encoder.fit_transform(df[column]) + df = df.apply(swap_values, axis=1) + median_values = df.median() df.fillna(median_values, inplace=True) diff --git a/main.py b/main.py index 01ea1ad..2ad39ce 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,12 @@ X,y=getData() X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30, random_state=50) startRandomForest(X_train,X_test,y_train,y_test) -startKNN(X_train,X_test,y_train,y_test) -startSVM(X_train,X_test,y_train,y_test) -startDecisionTree(X_train,X_test,y_train,y_test) -startLogisticRegression(X_train,X_test,y_train,y_test) \ No newline at end of file +#startKNN(X_train,X_test,y_train,y_test) +#startSVM(X_train,X_test,y_train,y_test) +#startDecisionTree(X_train,X_test,y_train,y_test) +#startLogisticRegression(X_train,X_test,y_train,y_test) + +#startLinearSVC(X_train,X_test,y_train,y_test) +#startNaiveBayes(X_train,X_test,y_train,y_test) + +# https://scikit-learn.org/stable/_static/ml_map.png \ No newline at end of file diff --git a/models.py b/models.py index 81380be..9c08d57 100644 --- a/models.py +++ b/models.py @@ -4,6 +4,9 @@ from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import SGDClassifier from sklearn import svm +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB + def RandomForest(X_train, X_test, y_train): random_forest = RandomForestClassifier(n_estimators=100, @@ -35,3 +38,13 @@ def LogisticRegress(X_train, X_test, y_train): logistic = LogisticRegression() logistic.fit(X_train,y_train) return logistic.predict(X_test),logistic + +def Linearsvc(X_train, X_test, y_train): + svc = LinearSVC(C=1.0, dual=False, verbose=True, loss="squared_hinge", multi_class="crammer_singer") + svc.fit(X_train,y_train) + return svc.predict(X_test),svc + +def GaussianNaiveBayes(X_train, X_test, y_train): + gnb = GaussianNB() + gnb.fit(X_train, y_train) + return gnb.predict(X_test),gnb \ No newline at end of file diff --git a/runModel.py b/runModel.py index 04cfddf..a2146be 100644 --- a/runModel.py +++ b/runModel.py @@ -45,4 +45,20 @@ def startLogisticRegression(X_train,X_test,y_train,y_test): report(lr_ac, lr_matrix, lr_class_report) seeMatrix(lr_matrix, lr.classes_) #rocCurve(y_test, y_pred) - #seeRocCurve(rf, X_train, y_train, 10) \ No newline at end of file + #seeRocCurve(rf, X_train, y_train, 10) + +def startLinearSVC(X_train,X_test,y_train,y_test): + y_pred, svc = Linearsvc(X_train, X_test, y_train) + svc_ac, svc_matrix, svc_class_report = calculateMatrix(y_test, y_pred) + report(svc_ac, svc_matrix, svc_class_report) + seeMatrix(svc_matrix, svc.classes_) + #rocCurve(y_test, y_pred) + #seeRocCurve(rf, X_train, y_train, 10) + +def startNaiveBayes(X_train,X_test,y_train,y_test): + y_pred, gnb = GaussianNaiveBayes(X_train, X_test, y_train) + gnb_ac, gnb_matrix, gnb_class_report = calculateMatrix(y_test, y_pred) + report(gnb_ac, gnb_matrix, gnb_class_report) + seeMatrix(gnb_matrix, gnb.classes_) + #rocCurve(y_test, y_pred) + #seeRocCurve(rf, X_train, y_train, 10)