Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging reports and code #99

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added CS410 - Final Project.docx
Binary file not shown.
Binary file added CS410 Project Proposal - Spiderman.docx
Binary file not shown.
Binary file added Dockerise a Python Program.docx
Binary file not shown.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# CourseProject
# CS410 Spiderman

Please fork this repository and paste the github link of your fork on Microsoft CMT. Detailed instructions are on Coursera under Week 1: Course Project Overview/Week 9 Activities.
Please click here for the video presentation:
Part I: https://youtu.be/0utoLsXVr9Y
Part II: https://youtu.be/o_V7v0G5xS8
27 changes: 27 additions & 0 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM python:3.8
#Labels as key value pair
LABEL Maintainer="[email protected]"


# Any working directory can be chosen as per choice like '/' or '/home' etc
# i have chosen /usr/app/src
WORKDIR /Users/sumababu1/sarcasm_config
COPY requirements.txt requirements.txt
RUN pip3 install --upgrade pip
RUN pip3 install tensorflow
RUN pip3 install -r requirements.txt
RUN pip3 install numpy --upgrade
RUN pip3 install requests
COPY . .


#to COPY the remote file at working directory in container
COPY sarcasm_final.py ./
# Now the structure looks like this '/usr/app/src/test.py'


#CMD instruction should be used to run the software
#contained by your image, along with any arguments.

CMD [ "python", "./sarcasm_final.py"]

Binary file added progressreport.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
tensorflow
requests
numpy
115 changes: 115 additions & 0 deletions sarcasm_final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import json
import tensorflow as tf
import numpy as np
import urllib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import numpy as np
import requests


def solution_model():
url = 'http://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json'
#datastore = urllib.request.urlretrieve(url, 'sarcasm.json')
datastore = requests.get('http://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json')
# DO NOT CHANGE THIS CODE OR THE TESTS MAY NOT WORK
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000

sentences = []
labels = []
# YOUR CODE HERE
for item in datastore.json():
sentences.append(item['headline'])
labels.append(item['is_sarcastic'])

training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation='relu'),
# YOUR CODE HERE. KEEP THIS OUTPUT LAYER INTACT OR TESTS MAY FAIL
tf.keras.layers.Dense(1, activation='sigmoid')
])
num_epochs = 10
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(training_padded, training_labels, epochs=num_epochs,
validation_data=(testing_padded, testing_labels), verbose=2)

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_sentence(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])

#print(decode_sentence(training_padded[0]))
#print(training_sentences[2])
#print(labels[2])
e = model.layers[0]
weights = e.get_weights()[0]
#print(weights.shape) # shape: (vocab_size, embedding_dim)
return model


# Note that you'll need to save your model as a .h5 like this.
# When you press the Submit and Test button, your saved .h5 model will
# be sent to the testing infrastructure for scoring
# and the score will be returned to you.
if __name__ == '__main__':
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"
training_size = 20000

model = solution_model()
#model.summary()
tokenizer = Tokenizer(oov_token="<OOV>")
#sentence = "facebook reportedly working on healthcare features and apps"
#sentence = "sentence = I really think this is amazing. honest."


sentence = ["obama visits arlington national cemetery to honor veterans",
"why writers must plan to be surprise",
"gillian jacobs on what it's like to kiss adam brody",
"rescuers heroically help beached garbage back into ocean",
"christian bale visits sikh temple victims",
"brita-unveils-new-in-throat-water-filters"]
tokenizer.fit_on_texts(sentence)
sequences = tokenizer.texts_to_sequences(sentence)


#sequence = tokenizer.texts_to_sequences([sentence])
#print(sequence)


#sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
#print(padded)
print(model.predict(padded))
#model.save("sarcasm.h5")