Skip to content

Commit

Permalink
Merge branch 'keras-team:master' into falcon-causallm
Browse files Browse the repository at this point in the history
  • Loading branch information
SamanehSaadat authored Mar 15, 2024
2 parents c5322b7 + 4511580 commit bb82633
Show file tree
Hide file tree
Showing 27 changed files with 1,249 additions and 204 deletions.
3 changes: 3 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ updates:
python:
patterns:
- "*"
ignore:
# ignore all updates for JAX GPU due to cuda version issue
- dependency-name: "jax[cuda12_pip]"
21 changes: 21 additions & 0 deletions .github/workflows/auto-assignment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: auto-assignment
on:
issues:
types:
- opened

permissions:
contents: read
issues: write
pull-requests: write

jobs:
welcome:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/github-script@v7
with:
script: |
const script = require('./\.github/workflows/scripts/auto-assignment.js')
script({github, context})
43 changes: 43 additions & 0 deletions .github/workflows/scripts/auto-assignment.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/** Automatically assign issues and PRs to users in the `assigneesList`
* on a rotating basis.
@param {!object}
GitHub objects can call GitHub APIs using their built-in library functions.
The context object contains issue and PR details.
*/

module.exports = async ({ github, context }) => {
let issueNumber;
let assigneesList;
// Is this an issue? If so, assign the issue number. Otherwise, assign the PR number.
if (context.payload.issue) {
//assignee List for issues.
assigneesList = ["SuryanarayanaY", "sachinprasadhs"];
issueNumber = context.payload.issue.number;
} else {
//assignee List for PRs.
assigneesList = [mattdangerw];
issueNumber = context.payload.number;
}
console.log("assignee list", assigneesList);
console.log("entered auto assignment for this issue: ", issueNumber);
if (!assigneesList.length) {
console.log("No assignees found for this repo.");
return;
}
let noOfAssignees = assigneesList.length;
let selection = issueNumber % noOfAssignees;
let assigneeForIssue = assigneesList[selection];

console.log(
"issue Number = ",
issueNumber + " , assigning to: ",
assigneeForIssue
);
return github.rest.issues.addAssignees({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
assignees: [assigneeForIssue],
});
};
10 changes: 7 additions & 3 deletions .github/workflows/scripts/labeler.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,20 @@ You may obtain a copy of the License at

module.exports = async ({ github, context }) => {
const issue_title = context.payload.issue ? context.payload.issue.title : context.payload.pull_request.title
const issue_discription = context.payload.issue ? context.payload.issue.body : context.payload.pull_request.body
let issue_description = context.payload.issue ? context.payload.issue.body : context.payload.pull_request.body
const issue_number = context.payload.issue ? context.payload.issue.number : context.payload.pull_request.number
const keyword_label = {
gemma:'Gemma'
}
const labelsToAdd = []
console.log(issue_title,issue_discription,issue_number)
console.log(issue_title,issue_description,issue_number)
if (issue_description==null)
{
issue_description = ''
}

for(const [keyword, label] of Object.entries(keyword_label)){
if(issue_title.toLowerCase().indexOf(keyword) !=-1 || issue_discription.toLowerCase().indexOf(keyword) !=-1 ){
if(issue_title.toLowerCase().indexOf(keyword) !=-1 || issue_description.toLowerCase().indexOf(keyword) !=-1 ){
console.log(`'${keyword}'keyword is present inside the title or description. Pushing label '${label}' to row.`)
labelsToAdd.push(label)
}
Expand Down
50 changes: 50 additions & 0 deletions .github/workflows/stale-issue-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: Close inactive issues
on:
schedule:
- cron: "30 1 * * *"
jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- name: Awaiting response issues
uses: actions/stale@v9
with:
days-before-issue-stale: 14
days-before-issue-close: 14
stale-issue-label: "stale"
# reason for closed the issue default value is not_planned
close-issue-reason: completed
only-labels: "stat:awaiting response from contributor"
stale-issue-message: >
This issue is stale because it has been open for 14 days with no activity.
It will be closed if no further activity occurs. Thank you.
# List of labels to remove when issues/PRs unstale.
labels-to-remove-when-unstale: "stat:awaiting response from contributor"
close-issue-message: >
This issue was closed because it has been inactive for 28 days.
Please reopen if you'd like to work on this further.
days-before-pr-stale: 14
days-before-pr-close: 14
stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you."
close-pr-message: "This PR was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further."
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Contribution issues
uses: actions/stale@v9
with:
days-before-issue-stale: 180
days-before-issue-close: 365
stale-issue-label: "stale"
# reason for closed the issue default value is not_planned
close-issue-reason: not_planned
any-of-labels: "stat:contributions welcome,good first issue"
# List of labels to remove when issues/PRs unstale.
labels-to-remove-when-unstale: "stat:contributions welcome,good first issue"
stale-issue-message: >
This issue is stale because it has been open for 180 days with no activity.
It will be closed if no further activity occurs. Thank you.
close-issue-message: >
This issue was closed because it has been inactive for more than 1 year.
repo-token: ${{ secrets.GITHUB_TOKEN }}
3 changes: 0 additions & 3 deletions .kokoro/github/ubuntu/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ if [[ -z "${KAGGLE_USERNAME}" ]]; then
fi

set -x

cd "${KOKORO_ROOT}/"

sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1

PYTHON_BINARY="/usr/bin/python3.9"

"${PYTHON_BINARY}" -m venv venv
Expand Down
47 changes: 29 additions & 18 deletions keras_nlp/layers/modeling/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,30 +85,42 @@ def __init__(
self.built = True

def call(self, inputs, start_index=0):
inputs = ops.moveaxis(
inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
)
cos_emb, sin_emb = self._compute_cos_sin_embedding(inputs, start_index)
return self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
output = self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
return ops.moveaxis(
output, (-1, 1), (self.feature_axis, self.sequence_axis)
)

def _apply_rotary_pos_emb(self, tensor, cos_emb, sin_emb):
x1, x2 = ops.split(tensor, 2, axis=self.feature_axis)
half_rot_tensor = ops.concatenate((-x2, x1), axis=self.feature_axis)
x1, x2 = ops.split(tensor, 2, axis=-1)
# Avoid `ops.concatenate` for now, to avoid a obscure bug with XLA
# compilation on jax. We should be able to remove this once the
# following PR is in all jax releases we care about:
# https://github.com/openxla/xla/pull/7875
half_rot_tensor = ops.stack((-x2, x1), axis=-2)
half_rot_tensor = ops.reshape(half_rot_tensor, ops.shape(tensor))
return (tensor * cos_emb) + (half_rot_tensor * sin_emb)

def _compute_cos_sin_embedding(self, inputs, start_index=0):
def get_axis(axis):
return axis if axis > 0 else len(inputs.shape) + axis
start_index = ops.cast(start_index, dtype="float32")

feature_axis = get_axis(self.feature_axis)
sequence_axis = get_axis(self.sequence_axis)
feature_axis = len(inputs.shape) - 1
sequence_axis = 1

rotary_dim = ops.shape(inputs)[feature_axis]
inverse_freq = self._get_inverse_freq(rotary_dim)

seq_len = ops.shape(inputs)[self.sequence_axis]
tensor = ops.cast(ops.arange(seq_len), self.compute_dtype) + start_index
seq_len = ops.shape(inputs)[sequence_axis]
tensor = ops.arange(seq_len, dtype="float32") + start_index

tensor = ops.cast(tensor, dtype=inverse_freq.dtype)
freq = ops.einsum("i,j->ij", tensor, inverse_freq)
embedding = ops.concatenate((freq, freq), axis=-1)
embedding = ops.stack((freq, freq), axis=-2)
embedding = ops.reshape(
embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
)

# Reshape the embedding to be broadcastable with input shape.
if feature_axis < sequence_axis:
Expand All @@ -117,17 +129,16 @@ def get_axis(axis):
if axis != sequence_axis and axis != feature_axis:
embedding = ops.expand_dims(embedding, axis)

return ops.cos(embedding), ops.sin(embedding)
cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
sin_emb = ops.cast(ops.sin(embedding), self.compute_dtype)
return cos_emb, sin_emb

def _get_inverse_freq(self, rotary_dim):
freq_range = ops.arange(0, rotary_dim, 2)
freq_range = ops.cast(freq_range, self.compute_dtype)
freq_range = freq_range / ops.cast(
self.scaling_factor, self.compute_dtype
)
freq_range = ops.arange(0, rotary_dim, 2, dtype="float32")
freq_range = freq_range / ops.cast(self.scaling_factor, "float32")
inverse_freq = 1.0 / (
self.max_wavelength
** (freq_range / ops.cast(rotary_dim, self.compute_dtype))
** (freq_range / ops.cast(rotary_dim, "float32"))
)
return inverse_freq

Expand Down
2 changes: 2 additions & 0 deletions keras_nlp/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
from keras_nlp.models.albert.albert_tokenizer import AlbertTokenizer
from keras_nlp.models.backbone import Backbone
from keras_nlp.models.bart.bart_backbone import BartBackbone
from keras_nlp.models.bart.bart_preprocessor import BartPreprocessor
from keras_nlp.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
Expand Down Expand Up @@ -130,6 +131,7 @@
from keras_nlp.models.roberta.roberta_tokenizer import RobertaTokenizer
from keras_nlp.models.t5.t5_backbone import T5Backbone
from keras_nlp.models.t5.t5_tokenizer import T5Tokenizer
from keras_nlp.models.task import Task
from keras_nlp.models.whisper.whisper_audio_feature_extractor import (
WhisperAudioFeatureExtractor,
)
Expand Down
3 changes: 2 additions & 1 deletion keras_nlp/models/backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import config
from keras_nlp.backend import keras
from keras_nlp.utils.preset_utils import check_preset_class
Expand All @@ -20,7 +21,7 @@
from keras_nlp.utils.python_utils import format_docstring


@keras.saving.register_keras_serializable(package="keras_nlp")
@keras_nlp_export("keras_nlp.models.Backbone")
class Backbone(keras.Model):
def __init__(self, *args, dtype=None, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
99 changes: 95 additions & 4 deletions keras_nlp/models/bloom/bloom_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,105 @@
"bloom_560m_multi": {
"metadata": {
"description": (
"24-layer Bloom model. trained on 45 natural languages and "
"12 programming languages."
"24-layer Bloom model with hidden dimension of 1024. "
"trained on 45 natural languages and 12 programming languages."
),
"params": 816115712,
"params": 559214592,
"official_name": "BLOOM",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloom",
"model_card": "https://huggingface.co/bigscience/bloom-560m",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloom_560m_multi/3",
},
"bloom_1.1b_multi": {
"metadata": {
"description": (
"24-layer Bloom model with hidden dimension of 1536. "
"trained on 45 natural languages and 12 programming languages."
),
"params": 1065314304,
"official_name": "BLOOM",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloom-1b1",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloom_1.1b_multi/1",
},
"bloom_1.7b_multi": {
"metadata": {
"description": (
"24-layer Bloom model with hidden dimension of 2048. "
"trained on 45 natural languages and 12 programming languages."
),
"params": 1722408960,
"official_name": "BLOOM",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloom-1b7",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloom_1.7b_multi/1",
},
"bloom_3b_multi": {
"metadata": {
"description": (
"30-layer Bloom model with hidden dimension of 2560. "
"trained on 45 natural languages and 12 programming languages."
),
"params": 3002557440,
"official_name": "BLOOM",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloom-3b",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloom_3b_multi/1",
},
"bloomz_560m_multi": {
"metadata": {
"description": (
"24-layer Bloom model with hidden dimension of 1024. "
"finetuned on crosslingual task mixture (xP3) dataset."
),
"params": 559214592,
"official_name": "BLOOMZ",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloomz-560m",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloomz_560m_multi/1",
},
"bloomz_1.1b_multi": {
"metadata": {
"description": (
"24-layer Bloom model with hidden dimension of 1536. "
"finetuned on crosslingual task mixture (xP3) dataset."
),
"params": 1065314304,
"official_name": "BLOOMZ",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloomz-1b1",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloomz_1.1b_multi/1",
},
"bloomz_1.7b_multi": {
"metadata": {
"description": (
"24-layer Bloom model with hidden dimension of 2048. "
"finetuned on crosslingual task mixture (xP3) dataset."
),
"params": 1722408960,
"official_name": "BLOOMZ",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloomz-1b7",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloomz_1.7b_multi/1",
},
"bloomz_3b_multi": {
"metadata": {
"description": (
"30-layer Bloom model with hidden dimension of 2560. "
"finetuned on crosslingual task mixture (xP3) dataset."
),
"params": 3002557440,
"official_name": "BLOOMZ",
"path": "bloom",
"model_card": "https://huggingface.co/bigscience/bloomz-3b",
},
"kaggle_handle": "kaggle://keras/bloom/keras/bloomz_3b_multi/1",
},
}
Loading

0 comments on commit bb82633

Please sign in to comment.