-
-
Notifications
You must be signed in to change notification settings - Fork 199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comparing to wrk #617
Comments
Hi, I think it's simply because Put simply, Ideally, we can distribute all works to each thread statically for this kind a application. You can check this hypothesis by using But we can implement real-time tui easily by using See. https://emschwartz.me/async-rust-can-be-a-pleasure-to-work-with-without-send-sync-static/ |
This could potentially be improved by setting up tokio in a runtime-per-core fashion, couldn't it? Doing so would give it performance closer to a runtime such as https://github.com/compio-rs/compio - bonus points if you can pin the thread to the core by setting affinity. The way compio does this is it has a dispatcher with a given set of threads, and each dispatched task gets allocated to the instance of the runtime on whatever thread receives it - see https://compio.rs/docs/compio/dispatcher for an example of this. Compio is a successor to projects such as monio/glommio (in my view), and supports windows as a bonus, which is why I am looking at it - but it's obviously not the only option, and tokio is very capable of such performance improvements. Tokio can be set up in a similar fashion by having a single-threaded runtime on each thread of a threadpool (for example with a rayon threadpool, setting up the instance via the The main performance loss when it comes to futexes etc generally comes down to the issue of work-stealing, as described in the article you linked - so a setup like this may be a relatively easy route to improving the situation here if you don't want to manage the dispatch yourself to properly evenly distribute load (though it's not hard to build a basic round-robin dispatcher). If you wanted to avoid a dependency here, compio does this by creating a Vec<> of thread JoinHandles, and makes all threads listen on an async channel - they use flume for task dispatch throughput, but tokio's own unbounded channels to dispatch tasks would probably serve just fine here. |
@hatoo the performance gap is less important for me than consistency. I mean if wrk give that the results that framework A is better than framework B with 50% performance more, I expect that oha give the same result (or nearly) |
Based on the command
{
"summary": {
"successRate": 1.0,
"total": 10.002132448,
"slowest": 0.012102818,
"fastest": 0.000011209,
"average": 0.0001493152938589139,
"requestsPerSec": 66487.72183905398,
"totalData": 0,
"sizePerRequest": 0,
"sizePerSec": 0.0
},
"responseTimeHistogram": {
"0.000011209": 2,
"0.0012203699": 664439,
"0.0024295308": 248,
"0.0036386917": 55,
"0.0048478526": 28,
"0.0060570135": 179,
"0.0072661744": 56,
"0.0084753353": 1,
"0.0096844962": 1,
"0.0108936571": 0,
"0.012102818": 4
},
"latencyPercentiles": {
"p10": 0.000056291,
"p25": 0.000088166,
"p50": 0.000170708,
"p75": 0.000196333,
"p90": 0.000204916,
"p95": 0.000211584,
"p99": 0.000259208,
"p99.9": 0.001067749,
"p99.99": 0.005996617
},
"rps": {
"mean": 66488.9445348776,
"stddev": 11061.71990612855,
"max": 200117.82937793774,
"min": 53453.523012592625,
"percentiles": {
"p10": 58920.9463964461,
"p25": 61416.20159398043,
"p50": 64900.08437010652,
"p75": 69390.87428897024,
"p90": 73052.14462083163,
"p95": 76854.12325878603,
"p99": 94786.61600970964,
"p99.9": 200109.47989645135,
"p99.99": 200117.82937793774
}
},
"details": {
"DNSDialup": {
"average": 0.00012242459999999998,
"fastest": 0.0000665,
"slowest": 0.000234041
},
"DNSLookup": {
"average": 0.000012754000000000002,
"fastest": 2.708e-6,
"slowest": 0.000022458
}
},
"statusCodeDistribution": {
"200": 665013
},
"errorDistribution": {
"aborted due to deadline": 6
}
}
{
"summary": {
"successRate": 1.0,
"total": 10.001754459,
"slowest": 0.00220447,
"fastest": 9.625e-6,
"average": 0.00004065595079742935,
"requestsPerSec": 242093.9256133362,
"totalData": 0,
"sizePerRequest": 0,
"sizePerSec": 0.0
},
"responseTimeHistogram": {
"0.000009625": 1,
"0.0002291095": 2410075,
"0.00044859399999999997": 8094,
"0.0006680785": 2169,
"0.000887563": 748,
"0.0011070475": 197,
"0.001326532": 50,
"0.0015460165": 13,
"0.001765501": 4,
"0.0019849855": 5,
"0.00220447": 1
},
"latencyPercentiles": {
"p10": 0.000015875,
"p25": 0.000021542,
"p50": 0.000036084,
"p75": 0.000048417,
"p90": 0.000066917,
"p95": 0.000081709,
"p99": 0.000138292,
"p99.9": 0.00049992,
"p99.99": 0.000906589
},
"rps": {
"mean": 242097.31066207887,
"stddev": 5207.43781862387,
"max": 254550.60466019373,
"min": 198427.09255277851,
"percentiles": {
"p10": 236192.6150775945,
"p25": 239009.2750405487,
"p50": 242308.7958092891,
"p75": 245443.68897663642,
"p90": 248328.68196278517,
"p95": 249928.86678410994,
"p99": 252729.1902214669,
"p99.9": 254414.52706948287,
"p99.99": 254550.60466019373
}
},
"details": {
"DNSDialup": {
"average": 0.0001030256,
"fastest": 0.000040042,
"slowest": 0.00022571
},
"DNSLookup": {
"average": 0.0000109418,
"fastest": 1.417e-6,
"slowest": 0.000022125
}
},
"statusCodeDistribution": {
"200": 2421357
},
"errorDistribution": {
"aborted due to deadline": 7
}
}
{
"summary": {
"successRate": 1.0,
"total": 10.002291605,
"slowest": 0.00905916,
"fastest": 0.000103333,
"average": 0.00040535336760578187,
"requestsPerSec": 24578.767517346343,
"totalData": 0,
"sizePerRequest": 0,
"sizePerSec": 0.0
},
"responseTimeHistogram": {
"0.000103333": 1,
"0.0009989157": 224180,
"0.0018944983999999998": 20234,
"0.0027900811": 1210,
"0.0036856638": 158,
"0.0045812464999999995": 40,
"0.005476829199999999": 2,
"0.0063724119": 6,
"0.007267994599999999": 2,
"0.0081635773": 0,
"0.00905916": 1
},
"latencyPercentiles": {
"p10": 0.000141792,
"p25": 0.000161833,
"p50": 0.000264083,
"p75": 0.00051525,
"p90": 0.000928416,
"p95": 0.001199416,
"p99": 0.001726999,
"p99.9": 0.002618248,
"p99.99": 0.004052497
},
"rps": {
"mean": 24579.56483432097,
"stddev": 1204.8223821470078,
"max": 28177.657311583807,
"min": 10021.887802961668,
"percentiles": {
"p10": 23145.246642660448,
"p25": 23842.17566190794,
"p50": 24632.55191735948,
"p75": 25381.23772759455,
"p90": 26056.258066792245,
"p95": 26393.759908404554,
"p99": 26886.61792831882,
"p99.9": 27770.695356076278,
"p99.99": 28177.657311583807
}
},
"details": {
"DNSDialup": {
"average": 0.0002408749,
"fastest": 0.000033,
"slowest": 0.000896541
},
"DNSLookup": {
"average": 0.0000261877,
"fastest": 1.375e-6,
"slowest": 0.00010025
}
},
"statusCodeDistribution": {
"200": 245834
},
"errorDistribution": {
"aborted due to deadline": 10
}
}
{
"summary": {
"successRate": 1.0,
"total": 10.002648437,
"slowest": 0.015493073,
"fastest": 0.000030833,
"average": 0.00012683846277234634,
"requestsPerSec": 78369.544319915,
"totalData": 0,
"sizePerRequest": 0,
"sizePerSec": 0.0
},
"responseTimeHistogram": {
"0.000030833": 1,
"0.0015770570000000002": 781369,
"0.0031232810000000003": 2134,
"0.004669505": 344,
"0.0062157290000000006": 33,
"0.007761953000000001": 7,
"0.009308176999999999": 1,
"0.010854401": 3,
"0.012400625": 0,
"0.013946849": 0,
"0.015493073000000001": 1
},
"latencyPercentiles": {
"p10": 0.000049709,
"p25": 0.000057875,
"p50": 0.000095209,
"p75": 0.00014475,
"p90": 0.000190291,
"p95": 0.000282958,
"p99": 0.000723792,
"p99.9": 0.002475373,
"p99.99": 0.004206205
},
"rps": {
"mean": 78375.9462269467,
"stddev": 10451.58276451326,
"max": 95623.57434512717,
"min": 25652.711190955175,
"percentiles": {
"p10": 65808.98960829696,
"p25": 73134.38106443692,
"p50": 80032.06884998479,
"p75": 85680.7369584364,
"p90": 89787.22829228407,
"p95": 91934.153538042,
"p99": 94954.27586408584,
"p99.9": 95513.08749559669,
"p99.99": 95623.57434512717
}
},
"details": {
"DNSDialup": {
"average": 0.0005560453999999999,
"fastest": 0.000057792,
"slowest": 0.001605665
},
"DNSLookup": {
"average": 0.000010800099999999998,
"fastest": 1.375e-6,
"slowest": 0.000022
}
},
"statusCodeDistribution": {
"200": 783893
},
"errorDistribution": {
"aborted due to deadline": 10
}
} Running 10s test @ http://172.17.0.2:3000
2 threads and 10 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 107.99us 293.84us 10.89ms 96.77%
Req/Sec 66.05k 18.69k 75.80k 88.12%
1327895 requests in 10.10s, 92.69MB read
Requests/sec: 131475.81
Transfer/sec: 9.18MB {
"summary": {
"successRate": 1.0,
"total": 10.001422443,
"slowest": 0.003776709,
"fastest": 0.000018708,
"average": 0.00008148780117765962,
"requestsPerSec": 121564.00821274659,
"totalData": 0,
"sizePerRequest": 0,
"sizePerSec": 0.0
},
"responseTimeHistogram": {
"0.000018708": 1,
"0.0003945081": 1212951,
"0.0007703082": 2618,
"0.0011461083000000002": 151,
"0.0015219084000000001": 45,
"0.0018977085": 12,
"0.0022735086000000002": 8,
"0.0026493087": 6,
"0.0030251088": 2,
"0.0034009089": 9,
"0.003776709": 1
},
"latencyPercentiles": {
"p10": 0.000038542,
"p25": 0.000051292,
"p50": 0.000072,
"p75": 0.000097875,
"p90": 0.000130875,
"p95": 0.000159583,
"p99": 0.000249625,
"p99.9": 0.000585916,
"p99.99": 0.000968209
},
"rps": {
"mean": 121570.43556402421,
"stddev": 11476.56738260729,
"max": 133215.9778302904,
"min": 31226.90719846694,
"percentiles": {
"p10": 117018.02077520668,
"p25": 120630.13340732458,
"p50": 123663.87239009385,
"p75": 126253.632543104,
"p90": 128092.65134764195,
"p95": 129180.71210892896,
"p99": 131663.60668838862,
"p99.9": 132223.11260008343,
"p99.99": 133215.9778302904
}
},
"details": {
"DNSDialup": {
"average": 0.00004298324592994573,
"fastest": 0.000018417,
"slowest": 0.000319417
},
"DNSLookup": {
"average": 2.703637395165278e-6,
"fastest": 9.16e-7,
"slowest": 0.000096959
}
},
"statusCodeDistribution": {
"200": 1215804
},
"errorDistribution": {
"aborted due to deadline": 9
}
} |
Hi @hatoo,
I'm considering using
oha
for https://github.com/the-benchmarker/web-frameworks, but I have some questions.Actually, we are using
wrk
. Using https://github.com/the-benchmarker/web-frameworks/blob/master/rust/actix/src/main.rs, I have some results withwrk
and with
oha http://172.17.0.2:3000 -j --no-tui -z 10s -c 10
(which is the same options), I haveand with more realistic test option I've found on README
oha http://172.17.0.2:3000 -j --no-tui -z 10s -c 10 --latency-correction --disable-keepalive
I have
How can you explain to variations beetween
wrk
andoha
?Regards,
The text was updated successfully, but these errors were encountered: