keras-team · skon7 · Sep 13, 2023 · Sep 21, 2023 · Sep 21, 2023 · Sep 29, 2023
diff --git a/examples/nlp/ipynb/masked_language_modeling.ipynb b/examples/nlp/ipynb/masked_language_modeling.ipynb
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -91,13 +91,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "\n",
     "@dataclass\n",
     "class Config:\n",
     "    MAX_LEN = 256\n",
@@ -126,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -138,13 +137,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "\n",
     "def get_text_list_from_files(files):\n",
     "    text_list = []\n",
     "    for name in files:\n",
@@ -155,7 +153,6 @@
     "\n",
     "\n",
     "def get_data_from_text_files(folder_name):\n",
-    "\n",
     "    pos_files = glob.glob(\"aclImdb/\" + folder_name + \"/pos/*.txt\")\n",
     "    pos_texts = get_text_list_from_files(pos_files)\n",
     "    neg_files = glob.glob(\"aclImdb/\" + folder_name + \"/neg/*.txt\")\n",
@@ -173,7 +170,8 @@
     "train_df = get_data_from_text_files(\"train\")\n",
     "test_df = get_data_from_text_files(\"test\")\n",
     "\n",
-    "all_data = train_df.append(test_df)"
+    "# all_data = train_df.append(test_df)\n",
+    "all_data = pd.concat([train_df, test_df], ignore_index=True)"
    ]
   },
   {
@@ -199,13 +197,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "\n",
     "def custom_standardization(input_data):\n",
     "    lowercase = tf.strings.lower(input_data)\n",
     "    stripped_html = tf.strings.regex_replace(lowercase, \"<br />\", \" \")\n",
@@ -341,13 +338,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "\n",
     "def bert_module(query, key, value, i):\n",
     "    # Multi headed self-attention\n",
     "    attention_output = layers.MultiHeadAttention(\n",
@@ -520,7 +516,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -545,7 +541,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -617,13 +613,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "\n",
     "def get_end_to_end(model):\n",
     "    inputs_string = keras.Input(shape=(1,), dtype=\"string\")\n",
     "    indices = vectorize_layer(inputs_string)\n",
@@ -670,4 +665,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
diff --git a/examples/nlp/masked_language_modeling.py b/examples/nlp/masked_language_modeling.py
@@ -111,7 +111,8 @@ def get_data_from_text_files(folder_name):
 train_df = get_data_from_text_files("train")
 test_df = get_data_from_text_files("test")
 
-all_data = train_df.append(test_df)
+# all_data = train_df.append(test_df)
+all_data = pd.concat([train_df, test_df], ignore_index=True)
 
 """
 ## Dataset preparation

diff --git a/examples/nlp/md/masked_language_modeling.md b/examples/nlp/md/masked_language_modeling.md
@@ -120,7 +120,8 @@ def get_data_from_text_files(folder_name):
 train_df = get_data_from_text_files("train")
 test_df = get_data_from_text_files("test")
 
-all_data = train_df.append(test_df)
+#all_data = train_df.append(test_df)
+all_data = pd.concat([train_df, test_df], ignore_index=True)
 ```
 <div class="k-default-codeblock">
 ```