Skip to content

Commit

Permalink
first draft for JOSS paper
Browse files Browse the repository at this point in the history
  • Loading branch information
Saransh-cpp committed Aug 16, 2024
1 parent 13a6370 commit dc3287f
Show file tree
Hide file tree
Showing 3 changed files with 268 additions and 0 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/paper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Draft PDF
on: [push, workflow_dispatch]

jobs:
paper:
runs-on: ubuntu-latest
name: Paper Draft
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build draft PDF
uses: openjournals/openjournals-draft-action@master
with:
journal: joss
paper-path: paper/paper.md
- name: Upload
uses: actions/upload-artifact@v4
with:
name: paper
path: paper/paper.pdf
163 changes: 163 additions & 0 deletions paper/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
@Article{ harris:2020,
title = {Array programming with {NumPy}},
author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J.
van der Walt and Ralf Gommers and Pauli Virtanen and David
Cournapeau and Eric Wieser and Julian Taylor and Sebastian
Berg and Nathaniel J. Smith and Robert Kern and Matti Picus
and Stephan Hoyer and Marten H. van Kerkwijk and Matthew
Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del
R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre
G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and
Warren Weckesser and Hameer Abbasi and Christoph Gohlke and
Travis E. Oliphant},
year = {2020},
month = sep,
journal = {Nature},
volume = {585},
number = {7825},
pages = {357--362},
doi = {10.1038/s41586-020-2649-2},
publisher = {Springer Science and Business Media {LLC}},
url = {https://doi.org/10.1038/s41586-020-2649-2}
}

@software{Gray:2023,
author = {Gray, Lindsey and Smith, Nicholas and Novak, Andrzej and Fackeldey, Peter and Tovar, Benjamin and Chen, Yi-Mu and Watts, Gordon and Krommydas, Iason},
doi = {10.5281/zenodo.7733568},
month = mar,
title = {{coffea}},
url = {https://github.com/CoffeaTeam/coffea},
version = {0.7.21},
year = {2023}
}

@software{Pivarski:2018,
author = {Pivarski, Jim and Osborne, Ianna and Ifrim, Ioana and Schreiner, Henry and Hollands, Angus and Biswas, Anish and Das, Pratyush and Roy Choudhury, Santam and Smith, Nicholas and Goyal, Manasvi},
doi = {10.5281/zenodo.4341376},
month = oct,
title = {{Awkward Array}},
year = {2018}
}

@inproceedings{rocklin:2015,
title={Dask: Parallel computation with blocked algorithms and task scheduling},
author={Rocklin, Matthew},
booktitle={Proceedings of the 14th python in science conference},
number={130-136},
year={2015},
organization={Citeseer}
}

@inproceedings{lam:2015,
title={Numba: A llvm-based python jit compiler},
author={Lam, Siu Kwan and Pitrou, Antoine and Seibert, Stanley},
booktitle={Proceedings of the Second Workshop on the LLVM Compiler Infrastructure in HPC},
pages={1--6},
year={2015}
}

@article{Meurer:2017,
title = {SymPy: symbolic computing in Python},
author = {Meurer, Aaron and Smith, Christopher P. and Paprocki, Mateusz and \v{C}ert\'{i}k, Ond\v{r}ej and Kirpichev, Sergey B. and Rocklin, Matthew and Kumar, AMiT and Ivanov, Sergiu and Moore, Jason K. and Singh, Sartaj and Rathnayake, Thilina and Vig, Sean and Granger, Brian E. and Muller, Richard P. and Bonazzi, Francesco and Gupta, Harsh and Vats, Shivam and Johansson, Fredrik and Pedregosa, Fabian and Curry, Matthew J. and Terrel, Andy R. and Rou\v{c}ka, \v{S}t\v{e}p\'{a}n and Saboo, Ashutosh and Fernando, Isuru and Kulal, Sumith and Cimrman, Robert and Scopatz, Anthony},
year = 2017,
month = jan,
keywords = {Python, Computer algebra system, Symbolics},
abstract = {
SymPy is an open source computer algebra system written in pure Python. It is built with a focus on extensibility and ease of use, through both interactive and programmatic applications. These characteristics have led SymPy to become a popular symbolic library for the scientific Python ecosystem. This paper presents the architecture of SymPy, a description of its features, and a discussion of select submodules. The supplementary material provide additional examples and further outline details of the architecture and features of SymPy.
},
volume = 3,
pages = {e103},
journal = {PeerJ Computer Science},
issn = {2376-5992},
url = {https://doi.org/10.7717/peerj-cs.103},
doi = {10.7717/peerj-cs.103}
}

@article{Kling:2023,
title={FLArE up dark sectors with EM form factors at the LHC forward physics facility},
volume={987},
ISSN={0550-3213},
url={http://dx.doi.org/10.1016/j.nuclphysb.2023.116103},
DOI={10.1016/j.nuclphysb.2023.116103},
journal={Nuclear Physics B},
publisher={Elsevier BV},
author={Kling, Felix and Kuo, Jui-Lin and Trojanowski, Sebastian and Tsai, Yu-Dai},
year={2023},
month=feb, pages={116103} }

@article{Held:2024,
author = "Held, Alexander and Kauffman, Elliott and Shadura, Oksana and Wightman, Andrew",
title = "{Physics analysis for the HL-LHC: Concepts and pipelines in practice with the Analysis Grand Challenge}",
eprint = "2401.02766",
archivePrefix = "arXiv",
primaryClass = "hep-ex",
doi = "10.1051/epjconf/202429506016",
journal = "EPJ Web Conf.",
volume = "295",
pages = "06016",
year = "2024"
}

@InProceedings{Qu:2022,
author = "Qu, Huilin and Li, Congqiao and Qian, Sitian",
title = "{Particle Transformer} for Jet Tagging",
booktitle = "{Proceedings of the 39th International Conference on Machine Learning}",
pages = "18281--18292",
year = "2022",
eprint = "2202.03772",
archivePrefix = "arXiv",
primaryClass = "hep-ph"
}

@article{Brehmer:2020,
author = "Brehmer, Johann and Kling, Felix and Espejo, Irina and Cranmer, Kyle",
title = "{MadMiner: Machine learning-based inference for particle physics}",
journal = "Comput. Softw. Big Sci.",
volume = "4",
year = "2020",
number = "1",
pages = "3",
doi = "10.1007/s41781-020-0035-2",
eprint = "1907.10621",
archivePrefix = "arXiv",
primaryClass = "hep-ph",
SLACcitation = "%%CITATION = ARXIV:1907.10621;%%"
}

@software{aryan:2023,
author = {Aryan Roy and
Jim Pivarski and
Chris Papageorgakis and
Javier Duarte and
Lindsey Gray and
Henry Schreiner and
Raghav Kansal and
Matthew Feickert and
Kilian Lieret and
ssrothman},
title = {scikit-hep/fastjet},
month = jan,
year = 2023,
publisher = {Zenodo},
doi = {10.5281/zenodo.7504167},
url = {https://doi.org/10.5281/zenodo.7504167}
}

@software{spyral-utils:2024,
author = {Gordon McCann},
title = "{spyral-utils}",
url = {https://github.com/ATTPC/spyral-utils},
}

@software{weaver-core:2024,
author = {Huilin Qu and Javier Duarte and Stephen Chao and sunwayihep},
title = "{weaver-core}",
url = {https://github.com/hqucms/weaver-core},
}

@software{pylhe,
author = {Lukas Heinrich and Matthew Feickert and Eduardo Rodrigues},
title = "{pylhe}",
doi = {10.5281/zenodo.1217031},
url = {https://github.com/scikit-hep/pylhe},
}
85 changes: 85 additions & 0 deletions paper/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
---
title: "Vector: creating and manipulating jagged arrays of Lorentz vectors"
tags:
- Python
- vector algebra
- high energy physics
authors:
- name: Henry Schreiner
orcid: 0000-0002-7833-783X
equal-contrib: true
affiliation: 1
- name: Jim Pivarski
orcid: 0000-0002-6649-343X
equal-contrib: true
corresponding: true
affiliation: 1
- name: Saransh Chopra
orcid: 0000-0003-3046-7675
equal-contrib: true
affiliation: 1

affiliations:
- name: Princeton University
index: 1
date: 10 August 2024
bibliography: paper.bib
---

# Summary

Vector algebra is a crucial component of data analysis pipelines in high energy
physics, enabling physicists to transform raw data into meaningful results that
can be visualized. Given that high energy physics data is not uniform, the
vector algebra frameworks or libraries are expected to work readily on
non-uniform or jagged data, allowing users to perform operations on an entire
jagged array in minimum passes. Furthermore, optimizing memory usage and
processing time has become essential with the increasing computational demands
at the LHC. Vector is a Python library for creating and manipulating 2D, 3D,
and Lorentz vectors, especially arrays of vectors, to solve common physics
problems in a NumPy-like [@harris:2020] way. The library enables physicists to
operate on high energy physics data in a high level language without
compromising speed. The library is already in use at LHC and is a part of
frameworks, like Coffea [@Gray:2023], employed by physicists across multiple
high energy physics experiments.

# Statement of need

Vector is currently the only Lorentz vector library providing a Pythonic
interface but a C++ (through Awkward Array [@Pivarski:2018]) computational
backend. Vector integrates seamlessly with the existing high energy physics
ecosystem and the broader scientific Python ecosystem, including libraries like
Dask [@rocklin:2015] and Numba [@lam:2015]. The library implements a variety of
backends for several purposes. Although vector was written with high energy
physics in mind, it is a general-purpose library that can be used for any
scientific or engineering application. The library houses 3+2 numerical
backends for experimental physicists and 1 symbolic backend for theoretical
physicists. These backends include a pure Python object backend for simple
computations, a SymPy [@Meurer:2017] backend for symbolic computations, a
NumPy backend for computations on regular data, an Awkward backend for
computations on jagged data, and implementations of the Object and the Awkward
backend in Numba for just-in-time compilable operations. Support for JAX and
Dask is also provided through the Awkward backend, which enable vector
functionalities to support automatic differentiation and parallel computing.

## Impact

Vector has become the de facto library for vector algebra in Python based high
energy physics data analysis pipelines. The library has been installed over
2 million times and 314 GitHub repositories use it as a dependency at the time
of writing this paper. Along with being utilized directly in analysis pipelines
at LHC and other experiments [@Kling:2023; @Held:2024; @Qu:2022], the library
is also used as a dependency in user-facing frameworks, such as, Coffea,
MadMiner [@Brehmer:2020], FastJet [@aryan:2023], Spyral [@spyral-utils:2024],
Weaver [@weaver-core:2024], and pylhe [@pylhe]. The library is also used in
multiple teaching materials for graduate courses and workshops. Finally, given
the generic nature of the library, it is also often used in non high energy
physics use cases.

# Acknowledgements

The work on vector was supported by NSF cooperative agreements OAC-1836650
(IRIS-HEP) and PHY-2323298 (IRIS-HEP). We would also like to thank the
contributors of vector and the Scikit-HEP community for their support.

# Reference

0 comments on commit dc3287f

Please sign in to comment.