You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Problem:
I recive the below mistake when running sdne_wiki.py:
Epoch 1/40
2023-06-06 09:11:55.533650: E tensorflow/stream_executor/cuda/cuda_blas.cc:636] failed to run cuBLAS routine cublasSgemm_v2: CUBLAS_STATUS_EXECUTION_FAILED
Traceback (most recent call last):
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_call
return fn(*args)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1329, in _run_fn
status, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in exit
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 146, in
sdne_model.train(batch_size=1024, epochs=40, verbose=2)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 111, in train
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1678, in fit
validation_steps=validation_steps)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1223, in _fit_loop
outs = f(ins_batch)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 2553, in call
fetches=fetches, feed_dict=feed_dict, **self.session_kwargs)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1344, in _do_run
options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Caused by op 'loss/1st_loss/MatMul', defined at:
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 145, in
sdne_model = SDNE(G, hidden_size=[256, 50], )
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 92, in init
self.reset_model()
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 100, in reset_model
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 849, in compile
output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 454, in weighted
score_array = fn(y_true, y_pred)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 50, in loss_1st
return alpha * 2 * tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 2022, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2516, in _mat_mul
name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
running:
python 3.6
tf-gpu 1.5
Problem:
I recive the below mistake when running sdne_wiki.py:
Epoch 1/40
2023-06-06 09:11:55.533650: E tensorflow/stream_executor/cuda/cuda_blas.cc:636] failed to run cuBLAS routine cublasSgemm_v2: CUBLAS_STATUS_EXECUTION_FAILED
Traceback (most recent call last):
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_call
return fn(*args)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1329, in _run_fn
status, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in exit
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 146, in
sdne_model.train(batch_size=1024, epochs=40, verbose=2)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 111, in train
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1678, in fit
validation_steps=validation_steps)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1223, in _fit_loop
outs = f(ins_batch)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 2553, in call
fetches=fetches, feed_dict=feed_dict, **self.session_kwargs)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1344, in _do_run
options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Caused by op 'loss/1st_loss/MatMul', defined at:
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 145, in
sdne_model = SDNE(G, hidden_size=[256, 50], )
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 92, in init
self.reset_model()
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 100, in reset_model
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 849, in compile
output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 454, in weighted
score_array = fn(y_true, y_pred)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 50, in loss_1st
return alpha * 2 * tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 2022, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2516, in _mat_mul
name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Does anython get the same problem with me ? how can i fix it? HELP
The text was updated successfully, but these errors were encountered: