@InProceedings{Rodriguez2006_ICCS, author="Rodriguez, D. and Cuadrado, J. J. and Sicilia, M. A. and Ruiz, R.", editor="Alexandrov, Vassil N. and van Albada, Geert Dick and Sloot, Peter M. A. and Dongarra, Jack", title="Segmentation of Software Engineering Datasets Using the M5 Algorithm", booktitle="Computational Science -- ICCS 2006", year="2006", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="789--796", abstract="This paper reports an empirical study that uses clustering techniques to derive segmented models from software engineering repositories, focusing on the improvement of the accuracy of estimates. In particular, we used two datasets obtained from the International Software Benchmarking Standards Group (ISBSG) repository and created clusters using the M5 algorithm. Each cluster is associated with a linear model. We then compare the accuracy of the estimates so generated with the classical multivariate linear regression and least median squares. Results show that there is an improvement in the accuracy of the results when using clustering. Furthermore, these techniques can help us to understand the datasets better; such techniques provide some advantages to project managers while keeping the estimation process within reasonable complexity.", isbn="978-3-540-34386-8", doi="https://doi.org/10.1007/11758549_106" }