@article{oai:kanazawa-u.repo.nii.ac.jp:00008996, author = {Hirano, Akihiro and Nakayama, Kenji}, journal = {第27回信号処理シンポジウム講演論文集 = Proc. of 27th SIP Symposium}, month = {Jan}, note = {This paper presents efficient implementa- tion of RLS-based adaptive filters with a large number of taps on nVIDIA GeForce graphics processing unit (GPU) and CUDA software development environment. Modification of the order and the combination of calcu- lations reduces the number of accesses to slow off-chip memory. Assigning tasks into multiple threads also takes memory access order into account. Multiple shader pro- cessor arrays are used to handle a large matrix. For a 8192-tap case, a GPU program is almost 30-times faster than a CPU program. Real-time processing is possible for an 8kHz-sampling and 512-tap case by us- ing 32 shader processors, which is only 25% of GeForce 8800GTS.}, pages = {241--245}, title = {Efficient Implementation of RLS-Based Adaptive Filterson nVIDIA GeForce Graphics Processing Unit}, year = {2012} }