@article{Nass_Swift_Al Dallal_2019, title={Indepth Analysis of Medical Dataset Mining: A Comparitive Analysis on a Diabetes Dataset Before and After Preprocessing}, volume={3}, url={https://knepublishing.com/index.php/KnE-Social/article/view/5190}, DOI={10.18502/kss.v3i25.5190}, abstractNote={Most of the healthcare organizations and medical research institutions store their patient’s data digitally for future references and for planning their future treatments. This heterogeneous medical dataset is very difficult to analyze due to its complexity and volume of data, in addition to having missing values and noise which makes this mining a tedious task. Efficient classification of medical dataset is a major data mining problem then and now. Diagnosis, prediction of diseases and the precision of results can be improved if relationships and patterns from these complex medical datasets are extracted efficiently. This paper analyses some of the major classification algorithms such as C4.5 ( J48), SMO, Naïve Bayes, KNN Classification algorithms and Random Forest and the performance of these algorithms are compared using WEKA. Performance evaluation of these algorithms is based on Accuracy, Sensitivity and Specificity and Error rate. The medical data set used in this study are Heart-Statlog Medical Data Set which holds medical data related to heart disease and Pima Diabetes Dataset which holds data related to Diabetics. This study contributes in finding the most suitable algorithm for classifying medical data and also reveals the importance of preprocessing in improving the classification performance. Comparative study of various performances of machine learning algorithms is done through graphical representation of the results. Keywords: Data Mining, Health Care, Classification Algorithms, Accuracy, Sensitivity, Specificity, Error Rate}, number={25}, journal={KnE Social Sciences}, author={Nass, Latifa and Swift, Stephen and Al Dallal, Ammar}, year={2019}, month={Sep.}, pages={45–63} }