@inproceedings{BBDGLST13,
author = {Ballard, G. and Bulu\c{c}, A. and Demmel, J. and Grigori, L. and Lipshitz, B. and Schwartz, O. and Toledo, S.},
title = {Communication optimal parallel multiplication of sparse random matrices},
booktitle = {Proceedings of the 25th ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '13},
year = {2013},
isbn = {978-1-4503-1572-2},
location = {Montr\'eal, Qu\'ebec, Canada},
pages = {222--231},
numpages = {10},
url = {http://doi.acm.org/10.1145/2486159.2486196},
doi = {10.1145/2486159.2486196},
acmid = {2486196},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {communication-avoiding algorithms, communication-cost lower bounds, random graphs, sparse matrix multiplication}
}

@inproceedings{BDLST13,
author = {Ballard, G. and Demmel, J. and Lipshitz, B. and Schwartz, O. and Toledo, S.},
title = {Communication efficient {Gaussian} elimination with partial pivoting using a shape morphing data layout},
booktitle = {Proceedings of the 25th ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '13},
year = {2013},
isbn = {978-1-4503-1572-2},
location = {Montr\'eal, Qu\'ebec, Canada},
pages = {232--240},
numpages = {9},
url = {http://doi.acm.org/10.1145/2486159.2486198},
doi = {10.1145/2486159.2486198},
acmid = {2486198},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {cache oblivious algorithms, communication-avoiding algorithms, matrix data layouts, matrix factorization}
}

@inproceedings{BB+13,
author = {Ballard, G. and Becker, D. and Demmel, J. and Dongarra, J. and Druinsky, A. and Peled, I. and Schwartz, O. and Toledo, S. and Yamazaki, I.},
title = {Implementing a Blocked {Aasen's} Algorithm with a Dynamic Scheduler on Multicore Architectures},
booktitle = {Proceedings of the 27th IEEE International Parallel Distributed Processing Symposium},
series = {IPDPS '13},
year = {2013},
month = May,
pages={895-907},
doi={10.1109/IPDPS.2013.98},
ISSN={1530-2075}
}

@article{BDHS12,
author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},
title = {Graph expansion and communication costs of fast matrix multiplication},
journal = {J. ACM},
issue_date = {December 2012},
volume = {59},
number = {6},
month = dec,
year = {2012},
issn = {0004-5411},
pages = {32:1--32:23},
articleno = {32},
numpages = {23},
url = {http://doi.acm.org/10.1145/2395116.2395121},
doi = {10.1145/2395116.2395121},
acmid = {2395121},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Communication-avoiding algorithms, I/O-complexity, fast matrix multiplication}
}

@inproceedings{LBDS12,
author = {B. Lipshitz and G. Ballard and J. Demmel and O. Schwartz},
title = {Communication-avoiding parallel {S}trassen: Implementation and performance},
booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},
series = {SC '12},
year = {2012},
isbn = {978-1-4673-0804-5},
location = {Salt Lake City, Utah},
pages = {101:1--101:11},
articleno = {101},
numpages = {11},
url = {http://dl.acm.org/citation.cfm?id=2388996.2389133},
acmid = {2389133},
publisher = {IEEE Computer Society Press},
address = {Los Alamitos, CA, USA}
}

@inproceedings{BDHLS12-RMM,
author={G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},
title={Graph Expansion Analysis for Communication Costs of Fast Rectangular Matrix Multiplication},
year={2012},
isbn={978-3-642-34861-7},
booktitle={Design and Analysis of Algorithms},
volume={7659},
series={Lecture Notes in Computer Science},
editor={G. Even and D. Rawitz},
doi={10.1007/978-3-642-34862-4_2},
url={http://dx.doi.org/10.1007/978-3-642-34862-4_2},
publisher={Springer Berlin Heidelberg},
pages={13-36}
}

@inproceedings{BDHLS12-CAPS,
author = {G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},
title = {Communication-optimal parallel algorithm for {S}trassen's matrix multiplication},
booktitle = {Proceedings of the 24th ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '12},
year = {2012},
isbn = {978-1-4503-1213-4},
location = {Pittsburgh, Pennsylvania, USA},
pages = {193--204},
numpages = {12},
url = {http://doi.acm.org/10.1145/2312005.2312044},
doi = {10.1145/2312005.2312044},
acmid = {2312044},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {communication-avoiding algorithms, fast matrix multiplication, parallel algorithms},
}

@inproceedings{BDHLS12-SS,
author = {G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},
title = {Brief announcement: strong scaling of matrix multiplication algorithms and memory-independent communication lower bounds},
booktitle = {Proceedings of the 24th ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '12},
year = {2012},
isbn = {978-1-4503-1213-4},
location = {Pittsburgh, Pennsylvania, USA},
pages = {77--79},
numpages = {3},
url = {http://doi.acm.org/10.1145/2312005.2312021},
doi = {10.1145/2312005.2312021},
acmid = {2312021},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {communication-avoiding algorithms, fast matrix multiplication, strong scaling},
}

@inproceedings{BDK12,
author = {G. Ballard and J. Demmel and N. Knight},
title = {Communication avoiding successive band reduction},
booktitle = {Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming},
series = {PPoPP '12},
year = {2012},
isbn = {978-1-4503-1160-1},
location = {New Orleans, Louisiana, USA},
pages = {35--44},
numpages = {10},
url = {http://doi.acm.org/10.1145/2145816.2145822},
doi = {10.1145/2145816.2145822},
acmid = {2145822},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {band reduction, communication avoiding algorithms, symmetric eigenvalue problem},
}

@article{BDHS11b,
author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},
title = {Minimizing Communication in Numerical Linear Algebra},
publisher = {SIAM},
year = {2011},
journal = {SIAM Journal on Matrix Analysis and Applications},
volume = {32},
number = {3},
pages = {866-901},
keywords = {linear algebra algorithms; bandwidth; latency; communication-avoiding; lower bound},
url = {http://link.aip.org/link/?SML/32/866/1},
doi = {10.1137/090769156}
}

@inproceedings{BDHS11a,
author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},
title = {Graph expansion and communication costs of fast matrix multiplication: regular submission},
booktitle = {Proceedings of the 23rd ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '11},
year = {2011},
isbn = {978-1-4503-0743-7},
location = {San Jose, California, USA},
pages = {1--12},
numpages = {12},
url = {http://doi.acm.org/10.1145/1989493.1989495},
doi = {10.1145/1989493.1989495},
acmid = {1989495},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {communication avoiding algorithms, fast matrix multiplication, i/o-complexity},
}

@inproceedings{BDG11,
author = {Ballard, G. and Demmel, J. and Gearhart, A.},
title = {Brief announcement: communication bounds for heterogeneous architectures},
booktitle = {Proceedings of the 23rd ACM Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '11},
year = {2011},
isbn = {978-1-4503-0743-7},
location = {San Jose, California, USA},
pages = {257--258},
numpages = {2},
url = {http://doi.acm.org/10.1145/1989493.1989531},
doi = {10.1145/1989493.1989531},
acmid = {1989531},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {communication-avoiding, heterogeneity},
}

@inproceedings{ABDK11,
author = {Anderson, M. and Ballard, G. and Demmel, J. and Keutzer, K.},
title = {Communication-Avoiding {QR} Decomposition for {GPU}s},
booktitle = {Proceedings of the 2011 IEEE International Parallel \& Distributed Processing Symposium},
series = {IPDPS '11},
year = {2011},
isbn = {978-0-7695-4385-7},
pages = {48--58},
numpages = {11},
url = {http://dx.doi.org/10.1109/IPDPS.2011.15},
doi = {10.1109/IPDPS.2011.15},
acmid = {2059520},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}

@article{BKP11,
author = {G. Ballard and T. Kolda and T. Plantenga},
title = {Efficiently Computing Tensor Eigenvalues on a GPU},
journal ={2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and PhD Forum, },
volume = {0},
issn = {1530-2075},
year = {2011},
pages = {1340-1348},
url = {http://www.computer.org/portal/web/csdl/doi/10.1109/IPDPS.2011.287},
doi = {10.1109/IPDPS.2011.287},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
}

@article{BDHS10,
author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},
title = {Communication-optimal Parallel and Sequential {C}holesky Decomposition},
publisher = {SIAM},
year = {2010},
journal = {SIAM Journal on Scientific Computing},
volume = {32},
number = {6},
pages = {3495-3523},
keywords = {Cholesky decomposition; bandwidth; latency; communication avoiding; algorithm; lower bound},
url = {http://link.aip.org/link/?SCE/32/3495/1},
doi = {10.1137/090760969}
}

@inproceedings{BDHS09,
author = {Ballard, G. and Demmel, J. and Holtz, O. and Schwartz, O.},
title = {Communication-optimal parallel and sequential {C}holesky decomposition: extended abstract},
booktitle = {Proceedings of the 22nd Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '09},
year = {2009},
isbn = {978-1-60558-606-9},
location = {Calgary, AB, Canada},
pages = {245--252},
numpages = {8},
url = {http://doi.acm.org/10.1145/1583991.1584054},
doi = {10.1145/1583991.1584054},
acmid = {1584054},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Cholesky decomposition, algorithm, bandwidth, communication avoiding, latency, lower bound},
}


Last updated 14 August 2013.