forked from apartresearch/ICML2024MI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
519 lines (458 loc) Β· 22.3 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>SCALE-LLM 2024 Workshop on the Scaling Behavior of Large Language Models</title>
<!-- Setup all meta-information like description and titles -->
<meta
name="description"
content="The workshop on the scaling behavior of large language models invites researchers to submit projects that uncover scaling laws, with a specific foucs on inverse scaling laws, in large language models."
/>
<meta
name="keywords"
content="Scaling laws, mechanistic interpretability, workshop"
/>
<meta name="author" content="Scaling laws workshop 2024" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<!-- Load fonts Gothic A1 -->
<link
href="https://fonts.googleapis.com/css?family=Gothic+A1:400,700&display=swap"
rel="stylesheet"
/>
<!-- Load style.css -->
<link rel="stylesheet" href="style.css" />
</head>
<body>
<!-- Navigation bar -->
<nav>
<ul class="navbar">
<li><a href="#introduction">Introduction</a></li>
<li><a href="#main-workshop-topics">Workshop Topics</a></li>
<li><a href="#important-dates">Important dates</a></li>
<li><a href="#submission-instructions">Submissions</a></li>
<li><a href="#student-scholarship">Scholarship</a></li>
<li><a href="#invited-speakers">Invited Speakers</a></li>
<li><a href="#schedule">Schedule</a></li>
<li><a href="#papers">Papers</a></li>
<li><a href="#organizing-committee">Organizers</a></li>
<li><a href="#sponsors">Sponsors</a></li>
<li><a href="#correspondence">Contacts</a></li>
</ul>
</nav>
<!-- Header with a background color filling approx. 300px and that has a title of the workshop and the date as a byline -->
<header>
<h1 class="fade-in">Workshop on the Scaling Behavior of Large Language Models</h1>
<h1 class="fade-in">SCALE-LLM 2024</h1>
<h2 class="fade-in">Malta, 22 March 2024, co-located with <a href="https://2024.eacl.org/" style="color: #ffa500">EACL 2024</a></h2>
</header>
<!-- Content on white background with sections Overview, Schedule, Speakers and Organizing Committee -->
<main class="fade-in">
<section><h2 id="introduction">Introduction</h2></section>
<section class="markdown">
The purpose of this workshop is to provide a venue to share and discuss results of investigations into the scaling behavior of Large Language Models (LLMs). We are particularly interested in results displaying "interesting" scaling curves (e.g., inverse, u-shaped, or inverse u-shaped scaling curves) for a variety of tasks. These results, where the performance of the LLMs decreases with increasing model size or follows a non-monotonic trend, deviating from the expected "the bigger, the better" positive scaling laws, are of great scientific interest as they can reveal intrinsic limitations of current LLM architectures and training paradigms and they provide novel research directions towards a better understanding of these models and of possible approaches to improve them.
Recently, there has been an increasing interest in these phenomena from the research community, culminating in the Inverse Scaling Prize ([McKenzie et al. 2023](pdf/inverse_scaling_prize_paper.pdf)), which solicited tasks to be systematically evaluated according to a standardized protocol in order to perform a systematic study. The SCALE-LLM Workshop will expand these efforts.
In contrast to the Inverse Scaling Prize, which focused on zero-shot tasks with a fixed format, we are also interested in, for example, few-shot and alternate prompting strategies (e.g. Chain-of-Thoughts), multi-step interactions (e.g. Tree-of-Thoughts, self-critique), hardening against prompt injection attacks (e.g. user input escaping, canary tokens), etc.
</section>
<section><h2 id="main-workshop-topics">Main Workshop Topics</h2></section>
<section class="markdown">
The workshop will provide focused discussions
on multiple topics in the general field of Scaling
behavior of Large Language Models, including, but
not limited to the following:
1. Novel tasks that exhibit Inverse, U-shaped,
Inverse U-shaped or other types of scaling;
2. Scaling behavior of fine-tuned or purpose-built
models, in particular in-distribution
(w.r.t. the fine-tuning dataset) vs. out-of-distribution;
3. Scaling with adaptive prompting strategies,
e.g. allowing intermediate "reasoning" steps,
model self-critique or use of external tools;
4. Scaling w.r.t. additional dimensions, such as
the number of in-context/fine-tuning examples,
the number of "reasoning" steps, or the
intrinsic task complexity;
5. Scaling on non-English language tasks, in particular
low-resource languages, where models
might exhibit tradeoffs as high-resource language
training data overwhelms low-resource
language capabilities;
6. Scaling w.r.t. qualitative characteristics: internal
aspects (e.g. modularity, mechanistic
interpretability), calibration, uncertainty, effectiveness
of various techniques (pruning, defences
against adversarial attacks, etc.).
</section>
<section><h2 id="important-dates">Important dates</h2></section>
<section class="markdown">
* **Workshop paper submission deadline: ~~December 18, 2023~~ December 25, 2023 (extended)**
* EACL rejected paper submission deadline (ARR pre-reviewed): January
17, 2024
* Notification of acceptance: January 27, 2024
* Camera-ready papers due: February 6, 2024
* Workshop date: March 22, 2024
</section>
<section><h2 id="submission-instructions">Submission instructions</h2></section>
<section class="markdown">
We solicit short and long paper submissions with
no more than **4 and 8 pages**, respectively, plus unlimited pages for references and appendices.
We welcome novel research, system descriptions and position papers.
Papers must contain "**Limitations**" and "**Ethics Statement**" sections which will not count towards the page limit.
Upon acceptance, **one additional page** will be provided to address the reviewers' comments.
Paper submissions must use the official [ACL style templates](https://github.com/acl-org/acl-style-files)
and must follow the [ACL formatting guidelines](https://acl-org.github.io/ACLPUB/formatting.html).
All submissions must be anonymous. De-anonymized versions of the submitted papers **may** be released on pre-print servers such as arXiv,
however, we kindly ask the authors not discuss these papers on social media during the review period.
Please, **send your submissions** to our [OpenReview interface](https://openreview.net/group?id=eacl.org/EACL/2024/Workshop/SCALE-LLM).
We can also consider papers submitted via **ACL Rolling Reviews (ARR)** to EACL and rejected.
A paper may not be simultaneously under review through ARR and SCALE-LLM. A paper that has or will receive reviews through ARR
may not be submitted for review to SCALE-LLM.
Keep in mind that ARR has stricter anonymity requirements regarding pre-print servers and social media,
so make sure you do not de-anonymize papers submitted through ARR by posting them on arXiv or social media.
Please refer to the [ARR instructions for autors](https://aclrollingreview.org/authors) for more information.
Accepted papers will be published in the Proceedings of the First Workshop on the Scaling Behavior of Large Language Models,
however you can request us not to publish your paper if you prefer so (e.g. if the paper has already been submitted to or published
in another venue).
</section>
<section><h2 id="student-scholarship">Student scholarship</h2></section>
<section class="markdown">
Thanks to our Platinum sponsor Google, we can offer financial support to a limited number of students from low-income countries or
other disadvantaged financial situation who would like to participate to the SCALE-LLM workshop.
We may able to cover the EACL virtual conference registration fee.
We will prioritize students who are authors of one of the accepted papers.
If you are interested in receiving financial support, please [contact us](correspondence) before January 30 2024, explaning your situation.
</section>
<section><h2 id="invited-speakers">Invited Speakers</h2></section>
<section class="markdown">
Ian McKenzie and Najoung Kim will each give a keynote talk.
### Ian McKenzie: Inverse Scaling: When Bigger isn't Better
Abstract: Work on scaling laws has found that large language models (LMs) show predictable improvements to overall loss with increased scale (model size, training data, and compute). I'll discuss the phenomenon of "inverse scaling": that LMs may show worse task performance with increased scale, e.g., due to flaws in the training objective and data. We collected empirical evidence of inverse scaling on 11 datasets collected by running a public contest, the Inverse Scaling Prize. Through analysis of the datasets, along with other examples found in the literature, we identified four potential causes of inverse scaling: (i) preference to repeat memorized sequences over following in-context instructions, (ii) imitation of undesirable patterns in the training data, (iii) tasks containing an easy distractor task which LMs could focus on, rather than the harder real task, and (iv) correct but misleading few-shot demonstrations of the task. Our tasks have helped drive the discovery of U-shaped and inverted-U scaling trends, where an initial trend reverses, suggesting that scaling trends are not always monotonic and that existing scaling laws less reliable at predicting the behavior of larger-scale models than previously understood. Our results suggest that there are tasks for which increased model scale alone may not lead to improved performance, and that more careful thought needs to go into the data and objectives for training language models.
Ian McKenzie is the main organizer of the Inverse Scaling Prize and first author of the associated paper,
currently he is a contracting Research Engineer on OpenAI's Dangerous Capability Evaluations project.
### Najoung Kim: Inverse scaling: mitigation strategies and open questions
Abstract: The Inverse Scaling Competition (McKenzie et al. 2023) solicited downstream tasks whose performance inversely correlates with model and training data size, leading to discoveries of various tasks that exhibit this pattern. I will discuss one known inference-time solution to this problem of using task demonstrations---even one-shot in-context examples often suffice to change the scaling pattern of the task from inverse to U-shaped or flat (Wei et al. 2023). However, this solution does not generalize to inverse scaling problems in the broader scope that do not adhere to the specific task formulations adopted by McKenzie et al. (2023). As an example, I will discuss a finding where more pretraining data leads to less effective training of novel token representations in the context of compositional generalization (Kim et al. 2022), as well as other relevant observations in the recent literature pointing to a wider range of open questions.
Dr. Kim is an Assistant Professor at Boston University and a researcher at
Google. She is also one of the authors of the Inverse Scaling Prize paper as well as other foundational
works in this field.
</section>
<section>
<div class="speakers">
<div class="speaker">
<img src="https://najoung.kim/assets/img/2022_pic.png" alt="Najoung Kim" />
<div>
<h3><a href="https://najoung.kim/">Najoung Kim</a></h3>
<p>Assistant Professor at Boston University</p>
</div>
</div>
<div class="speaker">
<img src="https://far.ai/author/ian-mckenzie/avatar_hua2a26105c18ff52aacb07efd529d5ba7_26729_270x270_fill_q75_lanczos_center.jpg" alt="Ian McKenzie" />
<div>
<h3><a href="https://irmckenzie.co.uk/">Ian McKenzie</a></h3>
<p>Contracting Research Engineer at OpenAI</p>
</div>
</div>
</div>
</section>
<section class="markdown">
</section>
<section><h2 id="schedule">Schedule</h2></section>
<section class="markdown">
Program overview (all times are GMT+1):
- 09:00 - 09:15 Opening Remarks
- 09:15 - 09:45 Invited Talk 1 - Ian McKenzie
- 09:45 - 10:30 Oral presentations
- 10:30 - 14:00 Break
- 14:00 - 14:30 Invited talk 2 - Najoung Kim
- 14:30 - 15:15 Panel discussion
- 15:15 - 15:30 Best paper announcement and closing remarks
- 15:30 - 17:30 Poster session
</section>
<section><h2 id="papers">Accepted papers</h2></section>
<section class="markdown">
### Oral presentations
- [Scaling Behavior of Machine Translation with Large Language Models under Prompt Injection Attacks](pdf/4.pdf) -
Zhifan Sun, Antonio Valerio Miceli-Barone
- [InstructEval: Towards Holistic Evaluation of Instruction-Tuned Large Language Models](pdf/9.pdf) -
Yew Ken Chia, Pengfei Hong, Lidong Bing, Soujanya Poria
- Findings of EACL: [When do Generative Query and Document Expansions Fail? A Comprehensive Study Across Methods, Retrievers, and Datasets](https://aclanthology.org/2024.findings-eacl.134/) -
Orion Weller, Kyle Lo, David Wadden, Dawn Lawrie, Benjamin Van Durme, Arman Cohan, Luca Soldaini
### Posters
- [A Proposal for Scaling the Scaling Laws](pdf/2.pdf) -
Wout Schellaert, Ronan Hamon, Fernando MartΓnez-Plumed, Jose Hernandez-Orallo
- [Can Large Language Models Reason About Goal-Oriented Tasks?](pdf/5.pdf) -
Filippos Bellos, Yayuan Li, Wuao Liu, Jason J Corso
- [Detecting Mode Collapse in Language Models via Narration](pdf/10.pdf) -
Sil Hamilton
</section>
<section>
<h2 id="organizing-committee">Organizing Committee</h2>
<div class="organizers">
<div class="Organizer">
<img src="https://homepages.inf.ed.ac.uk/amiceli/pic.jpg" alt="Antonio Valerio Miceli-Barone" />
<div>
<h3><a href="https://homepages.inf.ed.ac.uk/amiceli/">Antonio Valerio Miceli-Barone</a></h3>
<p>Research Associate, University of Edinburgh</p>
</div>
</div>
<div class="Organizer">
<img src="img/fazlbarez.jpeg" alt="Fazl Barez" />
<div>
<h3><a href="https://fbarez.github.io/">Fazl Barez</a></h3>
<p>Research fellow, University of Oxford</p>
</div>
</div>
<div class="Organizer">
<img src="img/Shay-newpic.png" alt="Shay Cohen" />
<div>
<h3><a href="https://homepages.inf.ed.ac.uk/scohen/">Shay Cohen</a></h3>
<p>Reader, University of Edinburgh</p>
</div>
</div>
<div class="Organizer">
<img src="https://lena-voita.github.io/img/people/lena_blue.jpeg" alt="Elena Voita" />
<div>
<h3><a href="https://lena-voita.github.io/">Elena Voita</a></h3>
<p>Research Scientist, Meta</p>
</div>
</div>
<div class="Organizer">
<img src="https://www.research.ed.ac.uk/files-asset/25535596/germann1.jpg" alt="Ulrich Germann" />
<div>
<h3><a href="https://www.research.ed.ac.uk/en/persons/ulrich-germann">Ulrich Germann</a></h3>
<p>Senior Computing Officer (Research)</p>
</div>
</div>
<div class="Organizer">
<img src="https://mlukasik.github.io/img/nav/i.jpg" alt="Michal Lukasik" />
<div>
<h3><a href="https://mlukasik.github.io/index.html">Michal Lukasik</a></h3>
<p>Researcher, Google Research</p>
</div>
</div>
</div>
</section>
<section>
<h2 id="sponsors">Sponsors</h2>
<div class="sponsors">
<div class="Sponsor">
<img src="img/Google_logo.svg" style="width:500px;" alt="Google" />
<div>
<h3><a href="https://research.google/">Google Research</a></h3>
<p>Platinum sponsor: supports our best paper awards and student scholarship</p>
</div>
</div>
<div class="Sponsor">
<img src="img/Meta_logo.svg" style="width:250px;" alt="Meta" />
<div>
<h3><a href="https://research.facebook.com/">Meta</a></h3>
<p>Silver sponsor</p>
</div>
</div>
<div class="Sponsor">
<img src="img/tas_cropped.png" style="height:64px;" alt="TAS" />
<div>
<h3><a href="https://web.inf.ed.ac.uk/tas">The UKRI Trustworthy<br />
Autonomous Systems Governance Node</a></h3>
<p>Organizers' sponsor</p>
</div>
</div>
<div class="Sponsor">
<img src="img/apart_logo_text_icon.png" style="height:64px;" alt="TAS" />
<div>
<h3><a href="https://apartresearch.com/">Apart Research</a></h3>
<p>Organizers' sponsor</p>
</div>
</div>
</div>
</section>
<section>
<h2 id="correspondence">Correspondence</h2>
<p>Email: <a href="mailto:[email protected]">[email protected]</a></p>
</section>
</main>
<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/9.1.0/marked.min.js" integrity="sha512-4+zFvAejSGVlybiAKYyAz3KMjmbIT7I+wXgx190ZAsT19L2z8S4htBy1scR7CyP9pDKNSaolJMLedCigA/gFVg==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script>
/**
* drawdown.js
* (c) Adam Leggett
*/
function markdown(src) {
var rx_lt = /</g;
var rx_gt = />/g;
var rx_space = /\t|\r|\uf8ff/g;
var rx_escape = /\\([\\\|`*_{}\[\]()#+\-~])/g;
var rx_hr = /^([*\-=_] *){3,}$/gm;
var rx_blockquote = /\n *> *([^]*?)(?=(\n|$){2})/g;
var rx_list =
/\n( *)(?:[*\-+]|((\d+)|([a-z])|[A-Z])[.)]) +([^]*?)(?=(\n|$){2})/g;
var rx_listjoin = /<\/(ol|ul)>\n\n<\1>/g;
var rx_highlight =
/(^|[^A-Za-z\d\\])(([*_])|(~)|(\^)|(--)|(\+\+)|`)(\2?)([^<]*?)\2\8(?!\2)(?=\W|_|$)/g;
var rx_code = /\n((```|~~~).*\n?([^]*?)\n?\2|(( .*?\n)+))/g;
var rx_link = /((!?)\[(.*?)\]\((.*?)( ".*")?\)|\\([\\`*_{}\[\]()#+\-.!~]))/g;
var rx_table = /\n(( *\|.*?\| *\n)+)/g;
var rx_thead = /^.*\n( *\|( *\:?-+\:?-+\:? *\|)* *\n|)/;
var rx_row = /.*\n/g;
var rx_cell = /\||(.*?[^\\])\|/g;
var rx_heading = /(?=^|>|\n)([>\s]*?)(#{1,6}) (.*?)( #*)? *(?=\n|$)/g;
var rx_para = /(?=^|>|\n)\s*\n+([^<]+?)\n+\s*(?=\n|<|$)/g;
var rx_stash = /-\d+\uf8ff/g;
function replace(rex, fn) {
src = src.replace(rex, fn);
}
function element(tag, content) {
return "<" + tag + ">" + content + "</" + tag + ">";
}
function blockquote(src) {
return src.replace(rx_blockquote, function (all, content) {
return element(
"blockquote",
blockquote(highlight(content.replace(/^ *> */gm, "")))
);
});
}
function list(src) {
return src.replace(rx_list, function (all, ind, ol, num, low, content) {
var entry = element(
"li",
highlight(
content
.split(
RegExp("\n ?" + ind + "(?:(?:\\d+|[a-zA-Z])[.)]|[*\\-+]) +", "g")
)
.map(list)
.join("</li><li>")
)
);
return (
"\n" +
(ol
? '<ol start="' +
(num
? ol + '">'
: parseInt(ol, 36) -
9 +
'" style="list-style-type:' +
(low ? "low" : "upp") +
'er-alpha">') +
entry +
"</ol>"
: element("ul", entry))
);
});
}
function highlight(src) {
return src.replace(
rx_highlight,
function (all, _, p1, emp, sub, sup, small, big, p2, content) {
return (
_ +
element(
emp
? p2
? "strong"
: "em"
: sub
? p2
? "s"
: "sub"
: sup
? "sup"
: small
? "small"
: big
? "big"
: "code",
highlight(content)
)
);
}
);
}
function unesc(str) {
return str.replace(rx_escape, "$1");
}
var stash = [];
var si = 0;
src = "\n" + src + "\n";
replace(rx_lt, "<");
replace(rx_gt, ">");
replace(rx_space, " ");
// blockquote
src = blockquote(src);
// horizontal rule
replace(rx_hr, "<hr/>");
// list
src = list(src);
replace(rx_listjoin, "");
// code
replace(rx_code, function (all, p1, p2, p3, p4) {
stash[--si] = element(
"pre",
element("code", p3 || p4.replace(/^ /gm, ""))
);
return si + "\uf8ff";
});
// link or image
replace(rx_link, function (all, p1, p2, p3, p4, p5, p6) {
stash[--si] = p4
? p2
? '<img src="' + p4 + '" alt="' + p3 + '"/>'
: '<a href="' + p4 + '">' + unesc(highlight(p3)) + "</a>"
: p6;
return si + "\uf8ff";
});
// table
replace(rx_table, function (all, table) {
var sep = table.match(rx_thead)[1];
return (
"\n" +
element(
"table",
table.replace(rx_row, function (row, ri) {
return row == sep
? ""
: element(
"tr",
row.replace(rx_cell, function (all, cell, ci) {
return ci
? element(
sep && !ri ? "th" : "td",
unesc(highlight(cell || ""))
)
: "";
})
);
})
)
);
});
// heading
replace(rx_heading, function (all, _, p1, p2) {
return _ + element("h" + p1.length, unesc(highlight(p2)));
});
// paragraph
replace(rx_para, function (all, content) {
return element("p", unesc(highlight(content)));
});
// stash
replace(rx_stash, function (all) {
return stash[parseInt(all)];
});
return src.trim();
}
</script>
<script>
document.addEventListener("DOMContentLoaded", function() {
// Get all the sections with the class "markdown"
let markdownSections = document.querySelectorAll('section.markdown');
markdownSections.forEach(section => {
// Convert markdown content to HTML
let convertedHTML = markdown(section.textContent);
// Replace section content with the converted HTML
section.innerHTML = convertedHTML;
console.log("Changed");
});
});
</script>
</body>
</html>